diff --git a/Makefile b/Makefile index 1580df774..da7cb8eae 100644 --- a/Makefile +++ b/Makefile @@ -86,9 +86,14 @@ o/$(MODE): \ o/$(MODE)/examples \ o/$(MODE)/third_party +ifneq ($(LANDLOCKMAKE_VERSION),) +ifeq ($(wildcard /usr/bin/ape),) +$(error please run ape/apeinstall.sh if you intend to use landlock make) +endif ifeq ($(USE_SYSTEM_TOOLCHAIN),) .STRICT = 1 endif +endif .PLEDGE = stdio rpath wpath cpath fattr proc .UNVEIL = \ diff --git a/ape/ape.S b/ape/ape.S index 8fb9a9198..62c82d01a 100644 --- a/ape/ape.S +++ b/ape/ape.S @@ -610,7 +610,7 @@ apesh: .ascii "\n@\n#'\"\n" // sixth edition shebang // extract the loader into a temp folder, and use it to // load the APE without modifying it. .ascii "[ x\"$1\" != x--assimilate ] && {\n" - .ascii "t=\"${TMPDIR:-${HOME:-.}}/.ape-1.1\"\n" + .ascii "t=\"${TMPDIR:-${HOME:-.}}/.ape-1.3\"\n" .ascii "[ -x \"$t\" ] || {\n" .ascii "mkdir -p \"${t%/*}\" &&\n" .ascii "dd if=\"$o\" of=\"$t.$$\" skip=" @@ -622,8 +622,8 @@ apesh: .ascii "\n@\n#'\"\n" // sixth edition shebang .ascii "[ -d /Applications ] && " .ascii "dd if=\"$t.$$\"" .ascii " of=\"$t.$$\"" - .ascii " skip=6" - .ascii " count=6" + .ascii " skip=5" + .ascii " count=8" .ascii " bs=64" .ascii " conv=notrunc" .ascii " 2>/dev/null\n" @@ -801,17 +801,18 @@ ape_loader_end: .long PT_NOTE .long PF_R .stub ape_note_offset,quad - .stub ape_note_vaddr,quad - .stub ape_note_paddr,quad + .quad 0 + .quad 0 .stub ape_note_filesz,quad .stub ape_note_memsz,quad - .stub ape_note_align,quad + .quad 4 #endif .previous #endif /* SupportsSystemv() || SupportsMetal() */ #if SupportsOpenbsd() .section .note.openbsd.ident,"a",@progbits + .balign 4 openbsd.ident: .long 2f-1f .long 4f-3f @@ -826,6 +827,7 @@ openbsd.ident: #if SupportsNetbsd() .section .note.netbsd.ident,"a",@progbits + .balign 4 netbsd.ident: .long 2f-1f .long 4f-3f diff --git a/ape/ape.lds b/ape/ape.lds index 405de1ee1..cb0dfbec2 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -581,11 +581,8 @@ ape_stack_memsz2 = ape_stack_memsz * 2; ape_stack_align = 16; ape_note_offset = ape_cod_offset + (ape_note - ape_cod_vaddr); -ape_note_vaddr = ape_note; -ape_note_paddr = ape_cod_paddr + ape_note_offset; ape_note_filesz = ape_note_end - ape_note; ape_note_memsz = ape_note_filesz; -ape_note_align = __SIZEOF_POINTER__; ape_text_offset = ape_cod_offset + LOADADDR(.text) - ape_cod_paddr; ape_text_paddr = LOADADDR(.text); @@ -665,13 +662,10 @@ CHURN(ape_elf_shoff); CHURN(ape_elf_shstrndx); CHURN(ape_macho_end); CHURN(ape_note); -CHURN(ape_note_align); CHURN(ape_note_end); CHURN(ape_note_filesz); CHURN(ape_note_memsz); CHURN(ape_note_offset); -CHURN(ape_note_paddr); -CHURN(ape_note_vaddr); CHURN(ape_ram_align); CHURN(ape_ram_filesz); CHURN(ape_ram_memsz); diff --git a/ape/ape.mk b/ape/ape.mk index 40739d700..6432fcf6f 100644 --- a/ape/ape.mk +++ b/ape/ape.mk @@ -181,41 +181,41 @@ o/$(MODE)/ape/ape-copy-self.o: \ -DAPE_NO_MODIFY_SELF $< o/$(MODE)/ape/loader.o: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=0b01111001 -g $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=121 -g $(APE_LOADER_FLAGS) o/$(MODE)/ape/loader-gcc.asm: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=0b01111001 -S -g0 $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=121 -S -g0 $(APE_LOADER_FLAGS) o/$(MODE)/ape/loader-clang.asm: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CLANG) -DSUPPORT_VECTOR=0b01111001 -S -g0 $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CLANG) -DSUPPORT_VECTOR=121 -S -g0 $(APE_LOADER_FLAGS) o/$(MODE)/ape/loader-xnu.o: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=0b00001000 -g $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=8 -g $(APE_LOADER_FLAGS) o/$(MODE)/ape/loader-xnu-gcc.asm: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=0b00001000 -S -g0 $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CC) -DSUPPORT_VECTOR=8 -S -g0 $(APE_LOADER_FLAGS) o/$(MODE)/ape/loader-xnu-clang.asm: ape/loader.c - @$(COMPILE) -AOBJECTIFY.c $(CLANG) -DSUPPORT_VECTOR=0b00001000 -S -g0 $(APE_LOADER_FLAGS) + @$(COMPILE) -AOBJECTIFY.c $(CLANG) -DSUPPORT_VECTOR=8 -S -g0 $(APE_LOADER_FLAGS) o/$(MODE)/ape/ape.elf: o/$(MODE)/ape/ape.elf.dbg -o/$(MODE)/ape/ape.macho: o/$(MODE)/ape/ape.macho.dbg + @$(COMPILE) -AOBJBINCOPY -w build/bootstrap/objbincopy.com -f -o $@ $< + +o/$(MODE)/ape/ape.macho: o/$(MODE)/ape/ape.elf.dbg + @$(COMPILE) -AOBJBINCOPY -w build/bootstrap/objbincopy.com -fm -o $@ $< APE_LOADER_LDFLAGS = \ -static \ - -no-pie \ -nostdlib \ --no-dynamic-linker \ - -zcommon-page-size=0x1000 \ - -zmax-page-size=0x1000 + -z separate-code \ + -z common-page-size=0x1000 \ + -z max-page-size=0x10000 o/$(MODE)/ape/ape.elf.dbg: \ - o/$(MODE)/ape/loader.o \ - o/$(MODE)/ape/loader-elf.o \ - ape/loader.lds - @$(COMPILE) -ALINK.elf $(LINK) -T ape/loader.lds $(APE_LOADER_LDFLAGS) -o $@ o/$(MODE)/ape/loader-elf.o o/$(MODE)/ape/loader.o - -o/$(MODE)/ape/ape.macho.dbg: \ - o/$(MODE)/ape/loader-xnu.o \ o/$(MODE)/ape/loader-macho.o \ + o/$(MODE)/ape/start.o \ + o/$(MODE)/ape/loader.o \ + o/$(MODE)/ape/launch.o \ + o/$(MODE)/ape/systemcall.o \ ape/loader.lds - @$(COMPILE) -ALINK.elf $(LINK) -T ape/loader.lds $(APE_LOADER_LDFLAGS) -o $@ o/$(MODE)/ape/loader-macho.o o/$(MODE)/ape/loader-xnu.o + @$(COMPILE) -ALINK.elf $(LINK) $(APE_LOADER_LDFLAGS) -o $@ $(patsubst %.lds,-T %.lds,$^) .PHONY: o/$(MODE)/ape o/$(MODE)/ape: $(APE_CHECKS) \ diff --git a/ape/apeuninstall.sh b/ape/apeuninstall.sh index a1526ad89..49889aac6 100755 --- a/ape/apeuninstall.sh +++ b/ape/apeuninstall.sh @@ -35,6 +35,7 @@ rm -f o/tmp/ape /tmp/ape "${TMPDIR:-/tmp}/ape" # ad-hoc installations for x in .ape \ .ape-1.1 \ + .ape-1.3 \ .ape-blink-0.9.2 \ .ape-blink-1.0.0; do rm -f \ diff --git a/ape/launch.S b/ape/launch.S new file mode 100644 index 000000000..581f66996 --- /dev/null +++ b/ape/launch.S @@ -0,0 +1,53 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + +// Calls _start() function of loaded program. +// +// When the program entrypoint is called, all registers shall be +// cleared, with the exception of (1) %rdi will be equal to %rsp +// on FreeBSD and (2) %cl will contain the detected host OS code +// +// We clear all the general registers we can to have some wiggle +// room, to extend the behavior of this loader in the future. We +// don't need to clear the XMM registers because your APE loader +// should be compiled using gcc/clang's -mgeneral-regs-only flag +// +// @param rdi is passed through as-is +// @param rsi is address of entrypoint (becomes zero) +// @param rdx is stack pointer (becomes zero) +// @param rcx is passed through as-is +// @noreturn +Launch: xor %r8d,%r8d + xor %r9d,%r9d + xor %r10d,%r10d + xor %r11d,%r11d + xor %r12d,%r12d + xor %r13d,%r13d + xor %r14d,%r14d + xor %r15d,%r15d + mov %rdx,%rsp + xor %edx,%edx + push %rsi + xor %esi,%esi + xor %ebp,%ebp + xor %ebx,%ebx + xor %eax,%eax + ret + .endfn Launch,globl diff --git a/ape/loader-elf.S b/ape/loader-elf.S deleted file mode 100644 index 37a4086e3..000000000 --- a/ape/loader-elf.S +++ /dev/null @@ -1,199 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/elf/def.h" -#include "libc/sysv/consts/prot.h" -#include "libc/macho.internal.h" -#include "libc/dce.h" -#include "libc/macros.internal.h" - -// APE Loader Executable Structure -// Linux, FreeBSD, NetBSD, OpenBSD - - .section .head - - .balign 8 -ehdr: .ascii "\177ELF" - .byte ELFCLASS64 - .byte ELFDATA2LSB - .byte 1 - .byte ELFOSABI_FREEBSD - .quad 0 - .word ET_EXEC // e_type - .word EM_NEXGEN32E // e_machine - .long 1 // e_version - .quad _start // e_entry - .quad phdrs - ehdr // e_phoff - .quad 0 // e_shoff - .long 0 // e_flags - .word 64 // e_ehsize - .word 56 // e_phentsize - .word 4 // e_phnum - .word 0 // e_shentsize - .word 0 // e_shnum - .word 0 // e_shstrndx - .endobj ehdr,globl - - .balign 8 -phdrs: .long PT_LOAD // p_type - .long PF_R // p_flags - .quad 0 // p_offset - .quad ehdr // p_vaddr - .quad ehdr // p_paddr - .quad rosize // p_filesz - .quad rosize // p_memsz - .quad 4096 // p_align - - .long PT_LOAD // p_type - .long PF_X // p_flags - .quad rosize // p_offset - .quad text // p_vaddr - .quad text // p_paddr - .quad textsz // p_filesz - .quad textsz // p_memsz - .quad 4096 // p_align - - .long PT_GNU_STACK // p_type - .long PF_R|PF_W // p_flags - .quad 0 // p_offset - .quad 0 // p_vaddr - .quad 0 // p_paddr - .quad 0 // p_filesz - .quad 8*1024*1024 // p_memsz - .quad 16 // p_align - - .long PT_NOTE // p_type - .long PF_R // p_flags - .quad note - ehdr // p_offset - .quad note // p_vaddr - .quad note // p_paddr - .quad notesize // p_filesz - .quad notesize // p_memsz - .quad 8 // p_align - .endobj phdrs - -note: .long 2f-1f - .long 4f-3f - .long 1 -1: .asciz "OpenBSD" -2: .balign 4 -3: .long 0 -4: .long 2f-1f - .long 4f-3f - .long 1 -1: .asciz "NetBSD" -2: .balign 4 -3: .long 901000000 -4: .endobj note - notesize = . - note - - .balign 64,0 // for ape.S dd - .org 64*6 // for ape.S dd - -// APE Loader XNU Header -// -// This header is dd'd backwards by the APE shell script when -// running on Mac OS X. -// -// @see ape/ape.S -macho: .long 0xFEEDFACE+1 - .long MAC_CPU_NEXGEN32E - .long MAC_CPU_NEXGEN32E_ALL - .long MAC_EXECUTE - .long 4 // number of load commands - .long 50f-10f // size of all load commands - .long MAC_NOUNDEFS // flags - .long 0 // reserved -10: .long MAC_LC_SEGMENT_64 - .long 20f-10b // unmaps first page dir - .ascin "__PAGEZERO",16 // consistent with linux - .quad 0,0x200000,0,0 // which forbids mem <2m - .long 0,0,0,0 -20: .long MAC_LC_SEGMENT_64 - .long 30f-20b - .ascin "__TEXT",16 - .quad ehdr // vaddr - .quad filesz // memsz - .quad 0 // file offset - .quad filesz // file size - .long PROT_EXEC|PROT_READ|PROT_WRITE // maxprot - .long PROT_EXEC|PROT_READ // initprot - .long 0 // segment section count - .long 0 // flags -30: .long MAC_LC_UUID - .long 40f-30b - .quad 0x3fb29ee4ac6c87aa // uuid1 - .quad 0xdd2c9bb866d9eef9 // uuid2 -40: .long MAC_LC_UNIXTHREAD - .long 50f-40b // cmdsize - .long MAC_THREAD_NEXGEN32E // flavaflav - .long (420f-410f)/4 // count -410: .quad 0 // rax - .quad 0 // rbx - .quad 0 // rcx - .quad 0 // rdx - .quad 0 // rdi - .quad 0 // rsi - .quad 0 // rbp - .quad 0 // rsp - .quad 0 // r8 - .quad 0 // r9 - .quad 0 // r10 - .quad 0 // r11 - .quad 0 // r12 - .quad 0 // r13 - .quad 0 // r14 - .quad 0 // r15 - .quad _apple // rip - .quad 0 // rflags - .quad 0 // cs - .quad 0 // fs - .quad 0 // gs -420: -50: - .endobj macho - - .balign 64,0 // for ape.S dd - .org 64*12 // for ape.S dd - - .text -_apple: mov $_HOSTXNU,%dl // xnu's not unix! -_start: mov %rsp,%rsi // save real stack - sub $1024*1024,%rsp // room for allocs - jmp ApeLoader - .endfn _start,globl - .endfn _apple,globl - -// Invokes system call. -// -// This function has eight parameters. The first seven are for -// arguments passed along to the system call. The eight is for -// the magic number that indicates which system call is called -// -// The return value follows the Linux kernel convention, where -// errors are returned as `-errno`. BSD systems are normalized -// to follow this convention automatically. -SystemCall: - mov %rcx,%r10 - mov 16(%rsp),%eax - clc - syscall - jnc 1f - neg %rax -1: ret - .endfn SystemCall,globl diff --git a/ape/loader-macho.S b/ape/loader-macho.S index 9adc82d42..7d27739ed 100644 --- a/ape/loader-macho.S +++ b/ape/loader-macho.S @@ -21,18 +21,19 @@ #include "libc/dce.h" #include "libc/macros.internal.h" -// APE Loader Executable Structure for XNU - - .section .head +// Apple Mach-O Executable Headers +// Fixups are applied by objbincopy.com +// There must exist a MAC_LC_SEGMENT_64 for every PT_LOAD + .section .macho,"a",@progbits .balign 64 -macho: .long 0xFEEDFACE+1 + .long 0xFEEDFACE+1 .long MAC_CPU_NEXGEN32E .long MAC_CPU_NEXGEN32E_ALL .long MAC_EXECUTE - .long 4 // number of load commands - .long 50f-10f // size of all load commands - .long MAC_NOUNDEFS // flags + .long 5 // number of load commands + .long 60f-10f // size of all load commands + .long MAC_NOUNDEFS|MAC_SPLIT_SEGS // flags .long 0 // reserved 10: .long MAC_LC_SEGMENT_64 .long 20f-10b // unmaps first page dir @@ -42,23 +43,34 @@ macho: .long 0xFEEDFACE+1 20: .long MAC_LC_SEGMENT_64 .long 30f-20b .ascin "__TEXT",16 - .quad macho // vaddr - .quad filesz // memsz + .quad 0 // vaddr + .quad 0 // memsz .quad 0 // file offset - .quad filesz // file size - .long PROT_EXEC|PROT_READ|PROT_WRITE // maxprot - .long PROT_EXEC|PROT_READ // initprot + .quad 0 // file size + .long 0 // maxprot + .long 0 // initprot .long 0 // segment section count .long 0 // flags -30: .long MAC_LC_UUID +30: .long MAC_LC_SEGMENT_64 .long 40f-30b - .quad 0x3fb29ee4ac6c87aa // uuid1 - .quad 0xdd2c9bb866d9eef8 // uuid2 -40: .long MAC_LC_UNIXTHREAD - .long 50f-40b // cmdsize + .ascin "__RODATA",16 + .quad 0 // vaddr + .quad 0 // memsz + .quad 0 // file offset + .quad 0 // file size + .long 0 // maxprot + .long 0 // initprot + .long 0 // segment section count + .long 0 // flags +40: .long MAC_LC_UUID + .long 50f-40b + .quad 0x4527148ba7a513ef // uuid1 + .quad 0x56fa865940665e8f // uuid2 +50: .long MAC_LC_UNIXTHREAD + .long 60f-50b // cmdsize .long MAC_THREAD_NEXGEN32E // flavaflav - .long (420f-410f)/4 // count -410: .quad 0 // rax + .long (520f-510f)/4 // count +510: .quad 0 // rax .quad 0 // rbx .quad 0 // rcx .quad 0 // rdx @@ -74,37 +86,10 @@ macho: .long 0xFEEDFACE+1 .quad 0 // r13 .quad 0 // r14 .quad 0 // r15 - .quad _start // rip + .quad XnuEntrypoint // rip .quad 0 // rflags .quad 0 // cs .quad 0 // fs .quad 0 // gs -420: -50: - .endobj macho,globl - - .balign 64 -_start: mov $_HOSTXNU,%dl // xnu's not unix! - mov %rsp,%rsi // save real stack - sub $1024*1024,%rsp // room for allocs - jmp ApeLoader - .endfn _start,globl - -// Invokes system call. -// -// This function has eight parameters. The first seven are for -// arguments passed along to the system call. The eight is for -// the magic number that indicates which system call is called -// -// The return value follows the Linux kernel convention, where -// errors are returned as `-errno`. BSD systems are normalized -// to follow this convention automatically. -SystemCall: - mov %rcx,%r10 - mov 16(%rsp),%eax - clc - syscall - jnc 1f - neg %rax -1: ret - .endfn SystemCall,globl +520: +60: diff --git a/ape/loader.c b/ape/loader.c index 5de00f11f..a92fcd3ca 100644 --- a/ape/loader.c +++ b/ape/loader.c @@ -77,9 +77,6 @@ * @note this can probably be used as a binfmt_misc interpreter */ -#define PAGE_SIZE 4096 -#define NULL_PAGE 2097152 - #define LINUX 1 #define XNU 8 #define OPENBSD 16 @@ -102,6 +99,7 @@ #define IsNetbsd() (SupportsNetbsd() && os == NETBSD) #define O_RDONLY 0 +#define PROT_NONE 0 #define PROT_READ 1 #define PROT_WRITE 2 #define PROT_EXEC 4 @@ -109,10 +107,12 @@ #define MAP_PRIVATE 2 #define MAP_FIXED 16 #define MAP_ANONYMOUS (IsLinux() ? 32 : 4096) +#define MAP_NORESERVE (IsLinux() ? 16384 : 0) #define ELFCLASS32 1 #define ELFDATA2LSB 1 #define EM_NEXGEN32E 62 #define ET_EXEC 2 +#define ET_DYN 3 #define PT_LOAD 1 #define PT_DYNAMIC 2 #define PT_INTERP 3 @@ -124,6 +124,7 @@ #define AT_PHDR 3 #define AT_PHENT 4 #define AT_PHNUM 5 +#define AT_PAGESZ 6 #define AT_EXECFN_LINUX 31 #define AT_EXECFN_NETBSD 2014 #define X_OK 1 @@ -132,11 +133,11 @@ #define PR_SET_MM 35 #define PR_SET_MM_EXE_FILE 13 -#define Read32(S) \ +#define READ32(S) \ ((unsigned)(255 & (S)[3]) << 030 | (unsigned)(255 & (S)[2]) << 020 | \ (unsigned)(255 & (S)[1]) << 010 | (unsigned)(255 & (S)[0]) << 000) -#define Read64(S) \ +#define READ64(S) \ ((unsigned long)(255 & (S)[7]) << 070 | \ (unsigned long)(255 & (S)[6]) << 060 | \ (unsigned long)(255 & (S)[5]) << 050 | \ @@ -146,6 +147,13 @@ (unsigned long)(255 & (S)[1]) << 010 | \ (unsigned long)(255 & (S)[0]) << 000) +#define DEBUG(VAR) \ + { \ + char ibuf[19] = {0}; \ + Utox(ibuf, VAR); \ + Print(os, 2, #VAR " ", ibuf, "\n", 0l); \ + } + struct ElfEhdr { unsigned char e_ident[16]; unsigned short e_type; @@ -176,7 +184,7 @@ struct ElfPhdr { union ElfEhdrBuf { struct ElfEhdr ehdr; - char buf[4096]; + char buf[8192]; }; union ElfPhdrBuf { @@ -199,8 +207,8 @@ struct ApeLoader { char path[1024]; }; -long SystemCall(long arg1, long arg2, long arg3, long arg4, long arg5, - long arg6, long arg7, long magi); +long SystemCall(long, long, long, long, long, long, long, int); +void Launch(void *, long, void *, int) __attribute__((__noreturn__)); extern char __executable_start[]; extern char _end[]; @@ -215,6 +223,27 @@ static unsigned long StrLen(const char *s) { return n; } +static int StrCmp(const char *l, const char *r) { + unsigned long i = 0; + while (l[i] == r[i] && r[i]) ++i; + return (l[i] & 255) - (r[i] & 255); +} + +static void Bzero(void *a, unsigned long n) { + long z; + char *p, *e; + p = (char *)a; + e = p + n; + z = 0; + while (p + sizeof(z) <= e) { + __builtin_memcpy(p, &z, sizeof(z)); + p += sizeof(z); + } + while (p < e) { + *p++ = 0; + } +} + static const char *MemChr(const char *s, unsigned char c, unsigned long n) { for (; n; --n, ++s) { if ((*s & 255) == c) { @@ -225,15 +254,29 @@ static const char *MemChr(const char *s, unsigned char c, unsigned long n) { } static void *MemMove(void *a, const void *b, unsigned long n) { - char *d = a; + long w; + char *d; + const char *s; unsigned long i; - const char *s = b; + d = (char *)a; + s = (const char *)b; if (d > s) { - for (i = n; i--;) { - d[i] = s[i]; + while (n >= sizeof(w)) { + n -= sizeof(w); + __builtin_memcpy(&w, s + n, sizeof(n)); + __builtin_memcpy(d + n, &w, sizeof(n)); + } + while (n--) { + d[n] = s[n]; } } else { - for (i = 0; i < n; ++i) { + i = 0; + while (i + sizeof(w) <= n) { + __builtin_memcpy(&w, s + i, sizeof(i)); + __builtin_memcpy(d + i, &w, sizeof(i)); + i += sizeof(w); + } + for (; i < n; ++i) { d[i] = s[i]; } } @@ -260,6 +303,23 @@ static char *GetEnv(char **p, const char *s) { return 0; } +static char *Utox(char p[19], unsigned long x) { + int i; + if (x) { + *p++ = '0'; + *p++ = 'x'; + i = (__builtin_clzl(x) ^ (sizeof(long) * 8 - 1)) + 1; + i = (i + 3) & -4; + do { + *p++ = "0123456789abcdef"[(x >> (i -= 4)) & 15]; + } while (i); + } else { + *p++ = '0'; + } + *p = 0; + return p; +} + static char *Utoa(char p[21], unsigned long x) { char t; unsigned long i, a, b; @@ -284,15 +344,22 @@ static char *Itoa(char p[21], long x) { return Utoa(p, x); } -__attribute__((__noreturn__)) static void Exit(int rc, int os) { - SystemCall(rc, 0, 0, 0, 0, 0, 0, - (IsLinux() ? 60 : 1) | (IsXnu() ? 0x2000000 : 0)); +__attribute__((__noinline__)) static long CallSystem(long arg1, long arg2, + long arg3, long arg4, + long arg5, long arg6, + long arg7, int numba, + char os) { + if (IsXnu()) numba |= 0x2000000; + return SystemCall(arg1, arg2, arg3, arg4, arg5, arg6, arg7, numba); +} + +__attribute__((__noreturn__)) static void Exit(long rc, int os) { + CallSystem(rc, 0, 0, 0, 0, 0, 0, IsLinux() ? 60 : 1, os); __builtin_unreachable(); } static int Close(int fd, int os) { - return SystemCall(fd, 0, 0, 0, 0, 0, 0, - (IsLinux() ? 3 : 6) | (IsXnu() ? 0x2000000 : 0)); + return CallSystem(fd, 0, 0, 0, 0, 0, 0, IsLinux() ? 3 : 6, os); } static long Pread(int fd, void *data, unsigned long size, long off, int os) { @@ -314,18 +381,15 @@ static long Pread(int fd, void *data, unsigned long size, long off, int os) { } static long Write(int fd, const void *data, unsigned long size, int os) { - return SystemCall(fd, (long)data, size, 0, 0, 0, 0, - (IsLinux() ? 1 : 4) | (IsXnu() ? 0x2000000 : 0)); + return CallSystem(fd, (long)data, size, 0, 0, 0, 0, IsLinux() ? 1 : 4, os); } static int Execve(const char *prog, char **argv, char **envp, int os) { - return SystemCall((long)prog, (long)argv, (long)envp, 0, 0, 0, 0, - 59 | (IsXnu() ? 0x2000000 : 0)); + return CallSystem((long)prog, (long)argv, (long)envp, 0, 0, 0, 0, 59, os); } static int Access(const char *path, int mode, int os) { - return SystemCall((long)path, mode, 0, 0, 0, 0, 0, - (IsLinux() ? 21 : 33) | (IsXnu() ? 0x2000000 : 0)); + return CallSystem((long)path, mode, 0, 0, 0, 0, 0, IsLinux() ? 21 : 33, os); } static int Msyscall(long p, unsigned long n, int os) { @@ -337,13 +401,12 @@ static int Msyscall(long p, unsigned long n, int os) { } static int Open(const char *path, int flags, int mode, int os) { - return SystemCall((long)path, flags, mode, 0, 0, 0, 0, - (IsLinux() ? 2 : 5) | (IsXnu() ? 0x2000000 : 0)); + return CallSystem((long)path, flags, mode, 0, 0, 0, 0, IsLinux() ? 2 : 5, os); } static int Mprotect(void *addr, unsigned long size, int prot, int os) { - return SystemCall((long)addr, size, prot, 0, 0, 0, 0, - (IsLinux() ? 10 : 74) | (IsXnu() ? 0x2000000 : 0)); + return CallSystem((long)addr, size, prot, 0, 0, 0, 0, IsLinux() ? 10 : 74, + os); } static long Mmap(void *addr, unsigned long size, int prot, int flags, int fd, @@ -396,21 +459,6 @@ __attribute__((__noreturn__)) static void Pexit(int os, const char *c, int rc, Exit(127, os); } -static int StrCmp(const char *l, const char *r) { - unsigned long i = 0; - while (l[i] == r[i] && r[i]) ++i; - return (l[i] & 255) - (r[i] & 255); -} - -static void *MemSet(void *a, int c, unsigned long n) { - char *d = a; - unsigned long i; - for (i = 0; i < n; ++i) { - d[i] = c; - } - return d; -} - static char EndsWithIgnoreCase(const char *p, unsigned long n, const char *s) { unsigned long i, m; if (n >= (m = StrLen(s))) { @@ -466,8 +514,6 @@ static char FindCommand(struct PathSearcher *ps, const char *suffix) { MemChr(ps->name, '\\', ps->namelen)) { ps->path[0] = 0; return AccessCommand(ps, suffix, 0); - } else { - if (AccessCommand(ps, suffix, 0)) return 1; } return SearchPath(ps, suffix); } @@ -486,7 +532,8 @@ static char *Commandv(struct PathSearcher *ps, int os, const char *name, } __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd, - long *sp, struct ElfEhdr *e, + long *sp, unsigned long pagesz, + struct ElfEhdr *e, struct ElfPhdr *p) { long rc; int prot; @@ -494,21 +541,26 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd, int found_code; int found_entry; long code, codesize; + unsigned long dynbase; + unsigned long virtmin, virtmax; unsigned long a, b, c, d, i, j; - /* load elf */ + /* validate elf */ code = 0; codesize = 0; found_code = 0; found_entry = 0; + virtmin = virtmax = 0; + if (!pagesz) pagesz = 4096; + if (pagesz & (pagesz - 1)) { + Pexit(os, exe, 0, "AT_PAGESZ isn't two power"); + } for (i = 0; i < e->e_phnum; ++i) { - - /* validate program header */ if (p[i].p_type == PT_INTERP) { - Pexit(os, exe, 0, "ELF has PT_INTERP which is unsupported"); + Pexit(os, exe, 0, "ELF has PT_INTERP which isn't supported"); } if (p[i].p_type == PT_DYNAMIC) { - Pexit(os, exe, 0, "ELF has PT_DYNAMIC which is unsupported"); + Pexit(os, exe, 0, "ELF has PT_DYNAMIC which isn't supported"); } if (p[i].p_type != PT_LOAD) { continue; @@ -517,48 +569,34 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd, continue; } if (p[i].p_filesz > p[i].p_memsz) { - Pexit(os, exe, 0, "ELF phdr filesz exceeds memsz"); + Pexit(os, exe, 0, "ELF p_filesz exceeds p_memsz"); } - if ((p[i].p_vaddr & (PAGE_SIZE - 1)) != (p[i].p_offset & (PAGE_SIZE - 1))) { - Pexit(os, exe, 0, "ELF phdr virt/off skew mismatch w.r.t. pagesize"); + if ((p[i].p_vaddr & (pagesz - 1)) != (p[i].p_offset & (pagesz - 1))) { + Pexit(os, exe, 0, "ELF p_vaddr incongruent w/ p_offset modulo AT_PAGESZ"); } if (p[i].p_vaddr + p[i].p_memsz < p[i].p_vaddr || - p[i].p_vaddr + p[i].p_memsz + (PAGE_SIZE - 1) < p[i].p_vaddr) { - Pexit(os, exe, 0, "ELF phdr vaddr+memsz overflow"); + p[i].p_vaddr + p[i].p_memsz + (pagesz - 1) < p[i].p_vaddr) { + Pexit(os, exe, 0, "ELF p_vaddr + p_memsz overflow"); } if (p[i].p_vaddr + p[i].p_filesz < p[i].p_vaddr || - p[i].p_vaddr + p[i].p_filesz + (PAGE_SIZE - 1) < p[i].p_vaddr) { - Pexit(os, exe, 0, "ELF phdr vaddr+files overflow"); - } - a = p[i].p_vaddr & -PAGE_SIZE; - b = (p[i].p_vaddr + p[i].p_memsz + (PAGE_SIZE - 1)) & -PAGE_SIZE; - if (MAX(a, 0) < MIN(b, NULL_PAGE)) { - Pexit(os, exe, 0, "ELF overlaps NULL page"); + p[i].p_vaddr + p[i].p_filesz + (pagesz - 1) < p[i].p_vaddr) { + Pexit(os, exe, 0, "ELF p_vaddr + p_filesz overflow"); } + a = p[i].p_vaddr & -pagesz; + b = (p[i].p_vaddr + p[i].p_memsz + (pagesz - 1)) & -pagesz; if (MAX(a, (unsigned long)__executable_start) < MIN(b, (unsigned long)_end)) { - Pexit(os, exe, 0, "ELF overlaps your APE loader"); + Pexit(os, exe, 0, "ELF segments overlap your APE loader"); } for (j = i + 1; j < e->e_phnum; ++j) { if (p[j].p_type != PT_LOAD) continue; - c = p[j].p_vaddr & -PAGE_SIZE; - d = (p[j].p_vaddr + p[j].p_memsz + (PAGE_SIZE - 1)) & -PAGE_SIZE; + c = p[j].p_vaddr & -pagesz; + d = (p[j].p_vaddr + p[j].p_memsz + (pagesz - 1)) & -pagesz; if (MAX(a, c) < MIN(b, d)) { - Pexit(os, exe, 0, "ELF overlaps its own vaspace"); + Pexit(os, exe, 0, "ELF segments overlap each others virtual memory"); } } - - /* configure mapping */ - prot = 0; - flags = MAP_FIXED | MAP_PRIVATE; - if (p[i].p_flags & PF_R) { - prot |= PROT_READ; - } - if (p[i].p_flags & PF_W) { - prot |= PROT_WRITE; - } if (p[i].p_flags & PF_X) { - prot |= PROT_EXEC; if (!found_code) { code = p[i].p_vaddr; codesize = p[i].p_filesz; @@ -568,30 +606,65 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd, found_entry = 1; } } + if (p[i].p_vaddr < virtmin) { + virtmin = p[i].p_vaddr; + } + if (p[i].p_vaddr + p[i].p_memsz > virtmax) { + virtmax = p[i].p_vaddr + p[i].p_memsz; + } + } + if (!found_entry) { + Pexit(os, exe, 0, "ELF entrypoint not found in PT_LOAD with PF_X"); + } + + /* choose loading address for dynamic elf executables + that maintains relative distances between segments */ + if (e->e_type == ET_DYN) { + rc = Mmap(0, virtmax - virtmin, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0, os); + if (rc < 0) Pexit(os, exe, rc, "pie mmap"); + dynbase = rc; + if (dynbase & (pagesz - 1)) { + Pexit(os, exe, 0, "OS mmap incongruent w/ AT_PAGESZ"); + } + if (dynbase + virtmin < dynbase) { + Pexit(os, exe, 0, "ELF dynamic base overflow"); + } + } else { + dynbase = 0; + } + + /* load elf */ + for (i = 0; i < e->e_phnum; ++i) { + if (p[i].p_type != PT_LOAD) continue; + if (!p[i].p_memsz) continue; + + /* configure mapping */ + prot = 0; + flags = MAP_FIXED | MAP_PRIVATE; + if (p[i].p_flags & PF_R) prot |= PROT_READ; + if (p[i].p_flags & PF_W) prot |= PROT_WRITE; + if (p[i].p_flags & PF_X) prot |= PROT_EXEC; /* load from file */ if (p[i].p_filesz) { void *addr; + int prot1, prot2; unsigned long size; - int dirty, prot1, prot2; - dirty = 0; prot1 = prot; prot2 = prot; - a = p[i].p_vaddr + p[i].p_filesz; - b = (a + (PAGE_SIZE - 1)) & -PAGE_SIZE; - c = p[i].p_vaddr + p[i].p_memsz; + a = p[i].p_vaddr + p[i].p_filesz; /* end of file content */ + b = (a + (pagesz - 1)) & -pagesz; /* first pure bss page */ + c = p[i].p_vaddr + p[i].p_memsz; /* end of segment data */ if (b > c) b = c; - if (c > b) { - dirty = 1; - if (~prot1 & PROT_WRITE) { - prot1 = PROT_READ | PROT_WRITE; - } + if (c > b && (~prot1 & PROT_WRITE)) { + prot1 = PROT_READ | PROT_WRITE; } - addr = (void *)(p[i].p_vaddr & -PAGE_SIZE); - size = (p[i].p_vaddr & (PAGE_SIZE - 1)) + p[i].p_filesz; - rc = Mmap(addr, size, prot1, flags, fd, p[i].p_offset & -PAGE_SIZE, os); + addr = (void *)(dynbase + (p[i].p_vaddr & -pagesz)); + size = (p[i].p_vaddr & (pagesz - 1)) + p[i].p_filesz; + rc = Mmap(addr, size, prot1, flags, fd, p[i].p_offset & -pagesz, os); if (rc < 0) Pexit(os, exe, rc, "prog mmap"); - if (dirty) MemSet((void *)a, 0, b - a); + if (c > b) Bzero((void *)(dynbase + a), b - a); if (prot2 != prot1) { rc = Mprotect(addr, size, prot2, os); if (rc < 0) Pexit(os, exe, rc, "prog mprotect"); @@ -600,83 +673,100 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd, /* allocate extra bss */ a = p[i].p_vaddr + p[i].p_filesz; - a = (a + (PAGE_SIZE - 1)) & -PAGE_SIZE; + a = (a + (pagesz - 1)) & -pagesz; b = p[i].p_vaddr + p[i].p_memsz; if (b > a) { - rc = Mmap((void *)a, b - a, prot, flags | MAP_ANONYMOUS, 0, 0, os); + flags |= MAP_ANONYMOUS; + rc = Mmap((void *)(dynbase + a), b - a, prot, flags, -1, 0, os); if (rc < 0) Pexit(os, exe, rc, "bss mmap"); } } /* finish up */ - if (!found_entry) { - Pexit(os, exe, 0, "ELF entrypoint not found in PT_LOAD with PF_X"); - } Close(fd, os); - Msyscall(code, codesize, os); + Msyscall(dynbase + code, codesize, os); - /* we clear all the general registers we can to have some wiggle room - to extend the behavior of this loader in the future. we don't need - to clear the xmm registers since the ape loader should be compiled - with the -mgeneral-regs-only flag. */ - asm volatile("xor\t%%eax,%%eax\n\t" - "xor\t%%r8d,%%r8d\n\t" - "xor\t%%r9d,%%r9d\n\t" - "xor\t%%r10d,%%r10d\n\t" - "xor\t%%r11d,%%r11d\n\t" - "xor\t%%ebx,%%ebx\n\t" /* netbsd doesnt't clear this */ - "xor\t%%r12d,%%r12d\n\t" /* netbsd doesnt't clear this */ - "xor\t%%r13d,%%r13d\n\t" /* netbsd doesnt't clear this */ - "xor\t%%r14d,%%r14d\n\t" /* netbsd doesnt't clear this */ - "xor\t%%r15d,%%r15d\n\t" /* netbsd doesnt't clear this */ - "mov\t%%rdx,%%rsp\n\t" - "xor\t%%edx,%%edx\n\t" - "push\t%%rsi\n\t" - "xor\t%%esi,%%esi\n\t" - "xor\t%%ebp,%%ebp\n\t" - "ret" - : /* no outputs */ - : "D"(IsFreebsd() ? sp : 0), "S"(e->e_entry), "d"(sp), "c"(os) - : "memory"); - __builtin_unreachable(); + /* call program entrypoint */ + Launch(IsFreebsd() ? sp : 0, dynbase + e->e_entry, sp, os); } -static void TryElf(struct ApeLoader *M, const char *exe, int fd, long *sp, - long *auxv, int os) { - unsigned size = M->ehdr.ehdr.e_phnum; - if (Read32(M->ehdr.buf) == Read32("\177ELF") && - M->ehdr.ehdr.e_type == ET_EXEC && - M->ehdr.ehdr.e_machine == EM_NEXGEN32E && - M->ehdr.ehdr.e_ident[EI_CLASS] != ELFCLASS32 && - M->ehdr.ehdr.e_phentsize >= sizeof(M->phdr.phdr) && - (size *= M->ehdr.ehdr.e_phentsize) <= sizeof(M->phdr.buf) && - Pread(fd, M->phdr.buf, size, M->ehdr.ehdr.e_phoff, os) == size) { - for (; *auxv; auxv += 2) { - switch (*auxv) { - case AT_PHDR: - auxv[1] = (unsigned long)&M->phdr; - break; - case AT_PHENT: - auxv[1] = M->ehdr.ehdr.e_phentsize; - break; - case AT_PHNUM: - auxv[1] = M->ehdr.ehdr.e_phnum; - break; - default: - break; - } - } - Spawn(os, exe, fd, sp, &M->ehdr.ehdr, &M->phdr.phdr); +static const char *TryElf(struct ApeLoader *M, const char *exe, int fd, + long *sp, long *auxv, unsigned long pagesz, int os) { + long rc; + unsigned size; + if (READ32(M->ehdr.buf) != READ32("\177ELF")) { + return "didn't embed ELF magic"; } + if (M->ehdr.ehdr.e_ident[EI_CLASS] == ELFCLASS32) { + return "32-bit ELF isn't supported"; + } + if (M->ehdr.ehdr.e_type != ET_EXEC && M->ehdr.ehdr.e_type != ET_DYN) { + return "ELF not ET_EXEC or ET_DYN"; + } + if (M->ehdr.ehdr.e_machine != EM_NEXGEN32E) { + return "couldn't find ELF header with x86-64 machine type"; + } + if (M->ehdr.ehdr.e_phentsize < sizeof(M->phdr.phdr)) { + return "e_phentsize is too small"; + } + size = M->ehdr.ehdr.e_phnum; + if ((size *= M->ehdr.ehdr.e_phentsize) > sizeof(M->phdr.buf)) { + return "too many ELF program headers"; + } + rc = Pread(fd, M->phdr.buf, size, M->ehdr.ehdr.e_phoff, os); + if (rc < 0) return "failed to read ELF program headers"; + if (rc != size) return "truncated read of ELF program headers"; + for (; *auxv; auxv += 2) { + switch (*auxv) { + case AT_PHDR: + auxv[1] = (unsigned long)&M->phdr; + break; + case AT_PHENT: + auxv[1] = M->ehdr.ehdr.e_phentsize; + break; + case AT_PHNUM: + auxv[1] = M->ehdr.ehdr.e_phnum; + break; + default: + break; + } + } + Spawn(os, exe, fd, sp, pagesz, &M->ehdr.ehdr, &M->phdr.phdr); +} + +static __attribute__((__noreturn__)) void ShowUsage(int os, int fd, int rc) { + Print(os, fd, + "NAME\n" + "\n" + " actually portable executable loader v1.3\n" + " copyright 2023 justine alexandra roberts tunney\n" + " https://justine.lol/ape.html\n" + "\n" + "USAGE\n" + "\n" + " ape [FLAGS] PROG [ARGV1,ARGV2,...]\n" + " ape [FLAGS] - PROG [ARGV0,ARGV1,...]\n" + "\n" + "FLAGS\n" + "\n" + " -h show this help\n" + " -f force loading of program (do not use execve)\n" + "\n", + 0l); + Exit(rc, os); } __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { int rc; unsigned i, n; + int usetheforce; int c, fd, os, argc; struct ApeLoader *M; + unsigned long pagesz; long *auxv, *ap, *ew; - char *p, *exe, *prog, **argv, **envp; + char *p, *pe, *exe, *ape, *prog, **argv, **envp; + + (void)Utox; /* detect freebsd */ if (SupportsXnu() && dl == XNU) { @@ -699,14 +789,21 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { } } + /* determine ape loader program name */ + ape = argv[0]; + if (!ape) ape = "ape"; + /* detect openbsd */ if (SupportsOpenbsd() && !os && !auxv[0]) { os = OPENBSD; } /* detect netbsd and find end of words */ + pagesz = 0; for (ap = auxv; ap[0]; ap += 2) { - if (SupportsNetbsd() && !os && ap[0] == AT_EXECFN_NETBSD) { + if (ap[0] == AT_PAGESZ) { + pagesz = ap[1]; + } else if (SupportsNetbsd() && !os && ap[0] == AT_EXECFN_NETBSD) { os = NETBSD; } } @@ -723,6 +820,23 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { os = LINUX; } + /* parse flags */ + usetheforce = 0; + while (argc > 1) { + if (argv[1][0] != '-') break; /* normal argument */ + if (!argv[1][1]) break; /* hyphen argument */ + if (!StrCmp(argv[1], "-h") || !StrCmp(argv[1], "--help")) { + ShowUsage(os, 1, 0); + } else if (!StrCmp(argv[1], "-f")) { + usetheforce = 1; + } else { + Print(os, 2, ape, ": invalid flag (pass -h for help)\n", 0l); + Exit(1, os); + } + *++sp = --argc; + ++argv; + } + /* we can load via shell, shebang, or binfmt_misc */ if (argc >= 3 && !StrCmp(argv[1], "-")) { /* if the first argument is a hyphen then we give the user the @@ -733,13 +847,7 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { argc = sp[3] = sp[0] - 3; argv = (char **)((sp += 3) + 1); } else if (argc < 2) { - Print(os, 2, - "usage: ape PROG [ARGV1,ARGV2,...]\n" - " ape - PROG [ARGV0,ARGV1,...]\n" - "αcτµαlly pδrταblε εxεcµταblε loader v1.2\n" - "copyright 2022 justine alexandra roberts tunney\n" - "https://justine.lol/ape.html\n", - 0l); + Print(os, 2, ape, ": missing command name (pass -h for help)\n", 0l); Exit(1, os); } else { prog = (char *)sp[2]; @@ -749,14 +857,15 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { /* resolve path of executable and read its first page */ if (!(exe = Commandv(&M->ps, os, prog, GetEnv(envp, "PATH")))) { - Pexit(os, prog, 0, "not found (maybe chmod +x)"); + Pexit(os, prog, 0, "not found (maybe chmod +x or ./ needed)"); } else if ((fd = Open(exe, O_RDONLY, 0, os)) < 0) { Pexit(os, exe, fd, "open"); } else if ((rc = Pread(fd, M->ehdr.buf, sizeof(M->ehdr.buf), 0, os)) < 0) { Pexit(os, exe, rc, "read"); - } else if (rc != sizeof(M->ehdr.buf)) { + } else if ((unsigned long)rc < sizeof(M->ehdr.ehdr)) { Pexit(os, exe, 0, "too small"); } + pe = M->ehdr.buf + rc; /* change argv[0] to resolved path if it's ambiguous */ if (argc > 0 && *prog != '/' && *exe == '/' && !StrCmp(prog, argv[0])) { @@ -767,21 +876,24 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { 1. if file is a native executable, try to run it natively 2. if ape, will scan shell script for elf printf statements 3. shell script may have multiple lines producing elf headers - 4. all elf printf lines must exist in the first 4096 bytes of file + 4. all elf printf lines must exist in the first 8192 bytes of file 5. elf program headers may appear anywhere in the binary */ - if ((IsXnu() && Read32(M->ehdr.buf) == 0xFEEDFACE + 1) || - (!IsXnu() && Read32(M->ehdr.buf) == Read32("\177ELF"))) { + if (!usetheforce && + ((IsXnu() && READ32(M->ehdr.buf) == 0xFEEDFACE + 1) || + (!IsXnu() && READ32(M->ehdr.buf) == READ32("\177ELF")))) { Close(fd, os); Execve(exe, argv, envp, os); + if ((fd = Open(exe, O_RDONLY, 0, os)) < 0) { + Pexit(os, exe, rc, "execve and open failed"); + } } - if (Read64(M->ehdr.buf) == Read64("MZqFpD='") || - Read64(M->ehdr.buf) == Read64("jartsr='")) { - for (p = M->ehdr.buf; p < M->ehdr.buf + sizeof(M->ehdr.buf); ++p) { - if (Read64(p) != Read64("printf '")) { + if (READ64(M->ehdr.buf) == READ64("MZqFpD='") || + READ64(M->ehdr.buf) == READ64("jartsr='")) { + for (p = M->ehdr.buf; p < pe; ++p) { + if (READ64(p) != READ64("printf '")) { continue; } - for (i = 0, p += 8; - p + 3 < M->ehdr.buf + sizeof(M->ehdr.buf) && (c = *p++) != '\'';) { + for (i = 0, p += 8; p + 3 < pe && (c = *p++) != '\'';) { if (c == '\\') { if ('0' <= *p && *p <= '7') { c = *p++ - '0'; @@ -796,12 +908,14 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) { } } M->ehdr.buf[i++] = c; + if (i >= sizeof(M->ehdr.buf)) { + break; + } } if (i >= sizeof(M->ehdr.ehdr)) { - TryElf(M, exe, fd, sp, auxv, os); + TryElf(M, exe, fd, sp, auxv, pagesz, os); } } } - TryElf(M, exe, fd, sp, auxv, os); - Pexit(os, exe, 0, "Not an acceptable APE/ELF executable for x86-64"); + Pexit(os, exe, 0, TryElf(M, exe, fd, sp, auxv, pagesz, os)); } diff --git a/ape/loader.lds b/ape/loader.lds index 7507beec2..f01d07eae 100644 --- a/ape/loader.lds +++ b/ape/loader.lds @@ -1,7 +1,7 @@ /*-*- mode: ld-script; indent-tabs-mode: nil; tab-width: 2; coding: utf-8 -*-│ │vi: set et sts=2 tw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,26 +16,80 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -ENTRY(_start) + +ENTRY(ElfEntrypoint) + +PHDRS { + text PT_LOAD FLAGS(1) FILEHDR PHDRS; /* PF_X */ + rodata PT_LOAD FLAGS(4); /* PF_R */ + stack PT_GNU_STACK FLAGS(6); /* PF_W|PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ +} SECTIONS { - . = 0x7fff0000; + + . = SEGMENT_START("text-segment", 0x7f000000); __executable_start = .; - .rodata : { - KEEP(*(.head)) - *(.rodata .rodata.*) - . = ALIGN(4096); - } + . += SIZEOF_HEADERS; + + .macho : { + KEEP(*(.macho)) + } :text + + .note : { + KEEP(*(.note)) + } :text :note + .text : { - text = .; - *(.text) - } + *(.text .text.* .gnu.linkonce.t.*) + } :text + + .rodata ALIGN(CONSTANT(COMMONPAGESIZE)) : { + *(.rodata .rodata.* .gnu.linkonce.r.*) + } :rodata + + .stack : { + *(.stack) + } :stack + _end = .; + + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .gnu.build.attributes : { *(.gnu.build.attributes .gnu.build.attributes.*) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + .debug_macro 0 : { *(.debug_macro) } + .debug_addr 0 : { *(.debug_addr) } + .debug_loclists 0 : { *(.debug_loclists) } + .debug_rnglists 0 : { *(.debug_rnglists) } + .debug_line_str 0 : { *(.debug_line_str) } + .debug_extra 0 : { *(.debug_line_str) } + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .gnu.attributes 0 : { KEEP(*(.gnu.attributes)) } + /DISCARD/ : { *(.*) } } - -textsz = SIZEOF(.text); -rosize = SIZEOF(.rodata); -filesz = SIZEOF(.rodata) + SIZEOF(.text); diff --git a/ape/start.S b/ape/start.S new file mode 100644 index 000000000..0c2e5ee80 --- /dev/null +++ b/ape/start.S @@ -0,0 +1,51 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/macros.internal.h" + +XnuEntrypoint: + mov $_HOSTXNU,%dl // xnu's not unix! +ElfEntrypoint: + mov %rsp,%rsi // save real stack + sub $1024*1024,%rsp // room for allocs + call ApeLoader + .endfn ElfEntrypoint,globl + .endfn XnuEntrypoint,globl + + .section .note,"a",@progbits + .balign 4 +openbsd.ident: + .long 2f-1f + .long 4f-3f + .long 1 +1: .asciz "OpenBSD" +2: .balign 4 +3: .long 0 +4: .size openbsd.ident,.-openbsd.ident + .type openbsd.ident,@object + .balign 4 +netbsd.ident: + .long 2f-1f + .long 4f-3f + .long 1 +1: .asciz "NetBSD" +2: .balign 4 +3: .long 901000000 +4: .size netbsd.ident,.-netbsd.ident + .type netbsd.ident,@object diff --git a/ape/systemcall.S b/ape/systemcall.S new file mode 100644 index 000000000..22652d800 --- /dev/null +++ b/ape/systemcall.S @@ -0,0 +1,38 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + +// Invokes system call. +// +// This function has eight parameters. The first seven are for +// arguments passed along to the system call. The eight is for +// the magic number that indicates which system call is called +// +// The return value follows the Linux kernel convention, where +// errors are returned as `-errno`. BSD systems are normalized +// to follow this convention automatically. +SystemCall: + mov %rcx,%r10 + mov 16(%rsp),%eax + clc + syscall + jnc 1f + neg %rax +1: ret + .endfn SystemCall,globl diff --git a/build/bootstrap/ape.elf b/build/bootstrap/ape.elf index 34bde40e6..d95feed05 100755 Binary files a/build/bootstrap/ape.elf and b/build/bootstrap/ape.elf differ diff --git a/build/bootstrap/ape.macho b/build/bootstrap/ape.macho index 41d4aa21d..5ab2342f9 100755 Binary files a/build/bootstrap/ape.macho and b/build/bootstrap/ape.macho differ diff --git a/build/bootstrap/objbincopy.com b/build/bootstrap/objbincopy.com new file mode 100755 index 000000000..7b995000f Binary files /dev/null and b/build/bootstrap/objbincopy.com differ diff --git a/build/definitions.mk b/build/definitions.mk index ffd9dba3c..e357fca83 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -83,7 +83,7 @@ COMMA := , PWD := $(shell build/bootstrap/pwd.com) IGNORE := $(shell $(ECHO) -2 ♥cosmo) -IGNORE := $(shell $(MKDIR) o/tmp) +IGNORE := $(shell $(MKDIR) $(TMPDIR)) ifneq ($(findstring aarch64,$(MODE)),) ARCH = aarch64 @@ -237,6 +237,7 @@ DEFAULT_ASFLAGS = \ DEFAULT_LDFLAGS = \ -static \ -nostdlib \ + -znorelro \ --gc-sections \ --build-id=none \ --no-dynamic-linker diff --git a/libc/calls/execve-sysv.c b/libc/calls/execve-sysv.c index 095a23161..186f66d4c 100644 --- a/libc/calls/execve-sysv.c +++ b/libc/calls/execve-sysv.c @@ -85,8 +85,8 @@ int sys_execve(const char *prog, char *const argv[], char *const envp[]) { (CanExecute((ape = "/usr/bin/ape")) || CanExecute((ape = Join(firstnonnull(getenv("TMPDIR"), firstnonnull(getenv("HOME"), ".")), - ".ape-1.1", buf))) || - CanExecute((ape = Join(firstnonnull(getenv("HOME"), "."), ".ape-1.1", + ".ape-1.3", buf))) || + CanExecute((ape = Join(firstnonnull(getenv("HOME"), "."), ".ape-1.3", buf))))) { shargs[0] = ape; shargs[1] = "-"; diff --git a/tool/build/assimilate.c b/tool/build/assimilate.c index 1b32f1798..02ca25ef4 100644 --- a/tool/build/assimilate.c +++ b/tool/build/assimilate.c @@ -164,9 +164,9 @@ void GetMachoPayload(const char *image, size_t imagesize, int *out_offset, } DCHECK_EQ(REG_OK, regcomp(&rx, - "bs=([ [:digit:]]+) " - "skip=\"?([ [:digit:]]+)\"? " - "count=\"?([ [:digit:]]+)\"?", + "bs=\"?\\$?(*([ [:digit:]]+))*\"? " + "skip=\"?\\$?(*([ [:digit:]]+))*\"? " + "count=\"?\\$?(*([ [:digit:]]+))*\"?", REG_EXTENDED)); rc = regexec(&rx, script, 4, rm, 0); if (rc != REG_OK) { diff --git a/tool/build/objbincopy.c b/tool/build/objbincopy.c new file mode 100644 index 000000000..46f054bcb --- /dev/null +++ b/tool/build/objbincopy.c @@ -0,0 +1,432 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/elf/def.h" +#include "libc/elf/elf.h" +#include "libc/elf/struct/ehdr.h" +#include "libc/intrin/kprintf.h" +#include "libc/macho.internal.h" +#include "libc/macros.internal.h" +#include "libc/runtime/runtime.h" +#include "libc/stdio/rand.h" +#include "libc/stdio/stdio.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/o.h" +#include "libc/sysv/consts/prot.h" +#include "third_party/getopt/getopt.internal.h" + +#define VERSION \ + "objbincopy v1.0\n" \ + "copyright 2023 justine tunney\n" \ + "https://github.com/jart/cosmopolitan\n" + +#define MANUAL \ + " -o OUTPUT INPUT\n" \ + "\n" \ + "DESCRIPTION\n" \ + "\n" \ + " Fast `objcopy -SO binary` that doesn't insert bloat.\n" \ + "\n" \ + " This program is for times where the unix linker is being\n" \ + " used to create executables, that define their own custom\n" \ + " executable headers. The ld program outputs such programs\n" \ + " as an executable wrapped inside an executable. Normally\n" \ + " the only way to get it out is using `objcopy -SO binary`\n" \ + " except that it has the undesirable impact of adding lots\n" \ + " of bloat to the output file, in order to make its layout\n" \ + " the same as the virtual memory layout. That's useful for\n" \ + " things like naive firmware loaders but isnt a great idea\n" \ + " when our goal is to generate files like ELF and PE which\n" \ + " support loading segments, from overlapping file regions.\n" \ + " Therefore, this program performs a naive objcopy of your\n" \ + " ELF PT_LOAD segments without considering virtual layout.\n" \ + "\n" \ + "FLAGS\n" \ + "\n" \ + " -h show usage\n" \ + " -o OUTPUT set output path\n" \ + " -m create Mach-O executable\n" \ + " -f coerce EI_OSABI to FreeBSD\n" \ + "\n" \ + "ARGUMENTS\n" \ + "\n" \ + " OUTPUT where to save the unwrapped executable\n" \ + " INPUT is an elf executable made by the unix linker\n" \ + "\n" + +#ifndef NDEBUG +#define DEBUG(...) kprintf("DEBUG: " __VA_ARGS__) +#else +#define DEBUG(...) (void)0 +#endif + +#define IsStaticStringEqual(buf, str) \ + (strnlen(buf, sizeof(buf)) == strlen(str) && !memcmp(buf, str, strlen(str))) + +static int outfd; +static bool want_macho; +static const char *prog; +static bool want_freebsd; +static const char *outpath; + +static wontreturn void Die(const char *thing, const char *reason) { + tinyprint(2, thing, ": ", reason, "\n", NULL); + exit(1); +} + +static wontreturn void DieSys(const char *thing) { + perror(thing); + exit(1); +} + +static wontreturn void ShowUsage(int rc, int fd) { + tinyprint(fd, VERSION, "\nUSAGE\n\n ", prog, MANUAL, NULL); + exit(rc); +} + +static void GetOpts(int argc, char *argv[]) { + int opt; + while ((opt = getopt(argc, argv, "hmfo:")) != -1) { + switch (opt) { + case 'o': + outpath = optarg; + break; + case 'f': + want_freebsd = true; + break; + case 'm': + want_macho = true; + break; + case 'h': + ShowUsage(0, 1); + default: + ShowUsage(1, 2); + } + } + if (!outpath) { + Die(prog, "need output path"); + } + if (optind == argc) { + Die(prog, "missing input argument"); + } +} + +static int PhdrFlagsToProt(Elf64_Word flags) { + int prot = PROT_NONE; + if (flags & PF_R) prot |= PROT_READ; + if (flags & PF_W) prot |= PROT_WRITE; + if (flags & PF_X) prot |= PROT_EXEC; + return prot; +} + +static void Write(const void *data, size_t size) { + ssize_t rc; + const char *p, *e; + for (p = data, e = p + size; p < e; p += (size_t)rc) { + if ((rc = write(outfd, p, e - p)) == -1) { + DieSys(outpath); + } + } +} + +// apple imposes very strict requirements which forbid creativity to the +// greatest possible extent. this routine is designed to help us know if +// something we've built won't be accepted by the xnu kernel. +static void ValidateMachoSection(const char *inpath, // + Elf64_Ehdr *elf, // + Elf64_Shdr *shdr, // + struct MachoHeader *macho, // + Elf64_Off filesize) { + int i; + char *end; + bool found_uuid; + bool found_segment; + uint64_t lastvaddr; + uint64_t lastoffset; + bool found_pagezero; + bool found_unixthread; + struct MachoLoadCommand *cmd; + if (!shdr) return; + if (elf->e_machine != EM_NEXGEN32E) { + Die(inpath, ".macho section only supported for ELF x86_64"); + } + if (!macho) Die(inpath, "corrupted .macho section content"); + if (shdr->sh_size < sizeof(struct MachoHeader)) { + Die(inpath, ".macho section too small for mach-o header"); + } + if (macho->magic != 0xFEEDFACE + 1) { + Die(inpath, ".macho header magic wasn't 0xFEEDFACE+1"); + } + if (macho->arch != MAC_CPU_NEXGEN32E) { + Die(inpath, "mach-o arch wasn't MAC_CPU_NEXGEN32E"); + } + if (shdr->sh_size != sizeof(struct MachoHeader) + macho->loadsize) { + Die(inpath, ".macho section size not equal to sizeof(header) + loadsize"); + } + lastvaddr = 0; + lastoffset = 0; + found_uuid = false; + found_segment = false; + found_pagezero = false; + found_unixthread = false; + end = (char *)(macho + 1) + macho->loadsize; + cmd = (struct MachoLoadCommand *)(macho + 1); + for (i = 0; i < macho->loadcount; ++i) { + if ((char *)cmd + sizeof(struct MachoLoadCommand *) > end || + (char *)cmd + cmd->size > end) { + Die(inpath, "mach-o load commands overflowed loadsize"); + } + if (cmd->command == MAC_LC_SEGMENT_64) { + size_t namelen; + struct MachoLoadSegment *loadseg; + loadseg = (struct MachoLoadSegment *)cmd; + if (loadseg->sectioncount) { + Die(inpath, "don't bother with mach-o sections"); + } + namelen = strnlen(loadseg->name, sizeof(loadseg->name)); + if (!loadseg->name) { + Die(inpath, "mach-o load segment missing name"); + } + if (filesize || (loadseg->vaddr && loadseg->memsz)) { + if (loadseg->vaddr < lastvaddr) { + Die(inpath, + "the virtual memory regions defined by mach-o load segment " + "commands aren't allowed to overlap and must be specified " + "monotonically"); + } + if (loadseg->vaddr + loadseg->memsz < loadseg->vaddr) { + Die(inpath, "mach-o segment memsz overflows"); + } + if (loadseg->filesz > loadseg->memsz) { + Die(inpath, "mach-o segment filesz exceeds memsz"); + } + lastvaddr = loadseg->vaddr + loadseg->memsz; + if (loadseg->vaddr & 4095) { + Die(inpath, "mach-o segment vaddr must be page aligned"); + } + } + if (filesize) { + if (loadseg->offset < lastoffset) { + Die(inpath, + "the file segments defined by mach-o load segment commands " + "aren't allowed to overlap and must be specified monotonically"); + } + if (loadseg->filesz > filesize) { + Die(inpath, "mach-o segment filesz exceeds file size"); + } + if (loadseg->offset + loadseg->filesz < loadseg->offset) { + Die(inpath, "mach-o segment offset + filesz overflows"); + } + if (loadseg->offset + loadseg->filesz > filesize) { + Die(inpath, "mach-o segment overlaps end of file"); + } + lastoffset = loadseg->offset + loadseg->filesz; + } + if (namelen == strlen("__PAGEZERO") && + !memcmp(loadseg->name, "__PAGEZERO", namelen)) { + found_pagezero = true; + if (i != 0) { + Die(inpath, "mach-o __PAGEZERO must be first load command"); + } + } else { + if (!found_segment) { + found_segment = true; + if (loadseg->offset) { + Die(inpath, "the first mach-o load segment (that isn't page zero) " + "must begin loading the executable from offset zero"); + } + } + } + } else if (cmd->command == MAC_LC_UUID) { + uint64_t *uuid; + found_uuid = true; + if (cmd->size != sizeof(*cmd) + 16) { + Die(inpath, "MAC_LC_UUID size wrong"); + } + uuid = (uint64_t *)(cmd + 1); + if (!uuid[0] && !uuid[1]) { + uuid[0] = _rand64(); + uuid[1] = _rand64(); + } + } else if (cmd->command == MAC_LC_UNIXTHREAD) { + uint64_t *registers; + struct MachoLoadThreadCommand *thread; + if (cmd->size != sizeof(*thread) + 21 * 8) { + Die(inpath, "MAC_LC_UNIXTHREAD size should be 4+4+4+4+21*8"); + } + thread = (struct MachoLoadThreadCommand *)cmd; + if (thread->flavor != MAC_THREAD_NEXGEN32E) { + Die(inpath, "MAC_LC_UNIXTHREAD flavor should be MAC_THREAD_NEXGEN32E"); + } + if (thread->count != 21 * 8 / 4) { + Die(inpath, "MAC_LC_UNIXTHREAD count should be 21*8/4"); + } + registers = (uint64_t *)(thread + 1); + if (!registers[16]) { + Die(inpath, "MAC_LC_UNIXTHREAD doesn't specify RIP register"); + } + found_unixthread = true; + } else { + Die(inpath, "unsupported mach-o load command"); + } + cmd = (struct MachoLoadCommand *)((char *)cmd + cmd->size); + } + if (!found_uuid) { + Die(inpath, "mach-o missing MAC_LC_UUID"); + } + if (!found_unixthread) { + Die(inpath, "mach-o missing MAC_LC_UNIXTHREAD"); + } + if (!found_pagezero) { + Die(inpath, "mach-o missing __PAGEZERO load segment command"); + } + if ((char *)cmd != end) { + Die(inpath, "mach-o loadsize greater than load commands"); + } +} + +static struct MachoLoadSegment *GetNextMachoLoadSegment( + struct MachoLoadCommand **load, int *count) { + struct MachoLoadCommand *cmd; + while (*count) { + --*count; + cmd = *load; + *load = (struct MachoLoadCommand *)((char *)cmd + cmd->size); + if (cmd->command == MAC_LC_SEGMENT_64) { + struct MachoLoadSegment *loadseg; + loadseg = (struct MachoLoadSegment *)cmd; + if (!IsStaticStringEqual(loadseg->name, "__PAGEZERO")) { + return loadseg; + } + } + } + return 0; +} + +static void HandleElf(const char *inpath, Elf64_Ehdr *elf, size_t esize) { + char *secstrs; + int i, loadcount; + Elf64_Off maxoff; + Elf64_Phdr *phdr; + char empty[64] = {0}; + Elf64_Shdr *macho_shdr; + struct MachoHeader *macho; + struct MachoLoadCommand *loadcommand; + struct MachoLoadSegment *loadsegment; + if (elf->e_type != ET_EXEC && elf->e_type != ET_DYN) { + Die(inpath, "elf binary isn't an executable"); + } + if (!(secstrs = GetElfSectionNameStringTable(elf, esize))) { + Die(inpath, "elf section name string table not found"); + } + macho_shdr = FindElfSectionByName(elf, esize, secstrs, ".macho"); + macho = GetElfSectionAddress(elf, esize, macho_shdr); + // ValidateMachoSection(inpath, elf, macho_shdr, macho, 0); + loadcommand = (struct MachoLoadCommand *)(macho + 1); + loadcount = macho->loadcount; + if (want_freebsd) { + elf->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; + } + elf->e_shoff = 0; + elf->e_shnum = 0; + elf->e_shstrndx = 0; + elf->e_shentsize = 0; + for (maxoff = i = 0; i < elf->e_phnum; ++i) { + phdr = GetElfProgramHeaderAddress(elf, esize, i); + if (!phdr) Die(inpath, "corrupted elf header"); + if (phdr->p_type == PT_INTERP) Die(inpath, "PT_INTERP isn't supported"); + if (phdr->p_type == PT_DYNAMIC) Die(inpath, "PT_DYNAMIC isn't supported"); + if (!phdr->p_filesz) continue; + maxoff = MAX(maxoff, phdr->p_offset + phdr->p_filesz); + if (macho && phdr->p_type == PT_LOAD) { + if (!(loadsegment = GetNextMachoLoadSegment(&loadcommand, &loadcount))) { + Die(inpath, "there must exist a MAC_LC_SEGMENT_64 for every PT_LOAD " + "when the .macho section is defined"); + } + loadsegment->vaddr = phdr->p_vaddr; + loadsegment->memsz = phdr->p_memsz; + loadsegment->offset = phdr->p_offset; + loadsegment->filesz = phdr->p_filesz; + loadsegment->initprot |= PhdrFlagsToProt(phdr->p_flags); + if (loadsegment->initprot == PROT_EXEC) { + loadsegment->initprot |= PROT_READ; + } + loadsegment->maxprot |= loadsegment->initprot; + } + } + // ValidateMachoSection(inpath, elf, macho_shdr, macho, maxoff); + Write((char *)elf, maxoff); + if (want_macho) { + if (!macho_shdr || !macho) { + Die(inpath, "requested Mach-O output but .macho section not found"); + } + if (lseek(outfd, 0, SEEK_SET)) { + DieSys(inpath); + } + // TODO(jart): Add a check that ensures we aren't overwriting + // anything except ELF headers and the old machoo + Write((char *)elf + macho_shdr->sh_offset, macho_shdr->sh_size); + } +} + +static void HandleInput(const char *inpath) { + int infd; + void *map; + ssize_t size; + if ((infd = open(inpath, O_RDONLY)) == -1) { + DieSys(inpath); + } + if ((size = lseek(infd, 0, SEEK_END)) == -1) { + DieSys(inpath); + } + if (size) { + if ((map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, 0)) == + MAP_FAILED) { + DieSys(inpath); + } + if (IsElf64Binary(map, size)) { + HandleElf(inpath, map, size); + } else { + Die(prog, "not an elf64 binary"); + } + if (munmap(map, size)) { + DieSys(inpath); + } + } + if (close(infd)) { + DieSys(inpath); + } +} + +int main(int argc, char *argv[]) { + int i, opt; + prog = argv[0]; + if (!prog) prog = "objbincopy"; + GetOpts(argc, argv); + if ((outfd = creat(outpath, 0755)) == -1) { + DieSys(outpath); + } + for (i = optind; i < argc; ++i) { + HandleInput(argv[i]); + } + if (close(outfd)) { + DieSys(outpath); + } +}