Make improvements

- This commit mints a new release of APE Loader v1.2 which supports
  loading ELF programs with a non-contiguous virtual address layout
  even though we've never been able to take advantage of it, due to
  how `objcopy -SO binary` fills any holes left by PT_LOAD. This'll
  change soon, since we'll have a new way of creating APE binaries.

- The undiamonding trick with our ioctl() implementation is removed
  since POSIX has been killing ioctl() for years and they've done a
  much better job. One problem it resolves, is that ioctl(FIONREAD)
  wasn't working earlier and that caused issues when building Emacs
This commit is contained in:
Justine Tunney 2023-07-11 04:29:33 -07:00
parent a1b1fdd1a4
commit 1ee2e89326
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
53 changed files with 1155 additions and 1255 deletions

View file

@ -77,12 +77,18 @@
* @note this can probably be used as a binfmt_misc interpreter
*/
#define PAGE_SIZE 4096
#define NULL_PAGE 2097152
#define LINUX 1
#define XNU 8
#define OPENBSD 16
#define FREEBSD 32
#define NETBSD 64
#define MIN(X, Y) ((Y) > (X) ? (X) : (Y))
#define MAX(X, Y) ((Y) < (X) ? (X) : (Y))
#define SupportsLinux() (SUPPORT_VECTOR & LINUX)
#define SupportsXnu() (SUPPORT_VECTOR & XNU)
#define SupportsFreebsd() (SUPPORT_VECTOR & FREEBSD)
@ -103,29 +109,29 @@
#define MAP_PRIVATE 2
#define MAP_FIXED 16
#define MAP_ANONYMOUS (IsLinux() ? 32 : 4096)
#define AT_EXECFN_LINUX 31
#define AT_EXECFN_NETBSD 2014
#define ELFCLASS32 1
#define ELFDATA2LSB 1
#define EM_NEXGEN32E 62
#define ET_EXEC 2
#define PT_LOAD 1
#define PT_DYNAMIC 2
#define PT_INTERP 3
#define EI_CLASS 4
#define EI_DATA 5
#define PF_X 1
#define PF_W 2
#define PF_R 4
#define AT_PHDR 3
#define AT_PHENT 4
#define AT_PHNUM 5
#define AT_EXECFN_LINUX 31
#define AT_EXECFN_NETBSD 2014
#define X_OK 1
#define XCR0_SSE 2
#define XCR0_AVX 4
#define PR_SET_MM 35
#define PR_SET_MM_EXE_FILE 13
#define Min(X, Y) ((Y) > (X) ? (X) : (Y))
#define Roundup(X, K) (((X) + (K)-1) & -(K))
#define Rounddown(X, K) ((X) & -(K))
#define Read32(S) \
((unsigned)(255 & (S)[3]) << 030 | (unsigned)(255 & (S)[2]) << 020 | \
(unsigned)(255 & (S)[1]) << 010 | (unsigned)(255 & (S)[0]) << 000)
@ -190,16 +196,14 @@ struct ApeLoader {
union ElfEhdrBuf ehdr;
union ElfPhdrBuf phdr;
struct PathSearcher ps;
char path[1024];
};
long SystemCall(long arg1, //
long arg2, //
long arg3, //
long arg4, //
long arg5, //
long arg6, //
long arg7, //
long magi);
long SystemCall(long arg1, long arg2, long arg3, long arg4, long arg5,
long arg6, long arg7, long magi);
extern char __executable_start[];
extern char _end[];
static int ToLower(int c) {
return 'A' <= c && c <= 'Z' ? c + ('a' - 'A') : c;
@ -300,7 +304,7 @@ static long Pread(int fd, void *data, unsigned long size, long off, int os) {
} else if (IsFreebsd()) {
magi = 0x1db;
} else if (IsOpenbsd()) {
magi = 0x0a9; // OpenBSD v7.3+
magi = 0x0a9; /* OpenBSD v7.3+ */
} else if (IsNetbsd()) {
magi = 0x0ad;
} else {
@ -337,6 +341,11 @@ static int Open(const char *path, int flags, int mode, int os) {
(IsLinux() ? 2 : 5) | (IsXnu() ? 0x2000000 : 0));
}
static int Mprotect(void *addr, unsigned long size, int prot, int os) {
return SystemCall((long)addr, size, prot, 0, 0, 0, 0,
(IsLinux() ? 10 : 74) | (IsXnu() ? 0x2000000 : 0));
}
static long Mmap(void *addr, unsigned long size, int prot, int flags, int fd,
long off, int os) {
long magi;
@ -347,7 +356,7 @@ static long Mmap(void *addr, unsigned long size, int prot, int flags, int fd,
} else if (IsFreebsd()) {
magi = 477;
} else if (IsOpenbsd()) {
magi = 49; // OpenBSD v7.3+
magi = 49; /* OpenBSD v7.3+ */
} else if (IsNetbsd()) {
magi = 197;
} else {
@ -480,15 +489,26 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd,
long *sp, struct ElfEhdr *e,
struct ElfPhdr *p) {
long rc;
int prot, flags;
int prot;
int flags;
int found_code;
int found_entry;
long code, codesize;
unsigned long a, b, i;
unsigned long a, b, c, d, i, j;
/* load elf */
code = 0;
codesize = 0;
for (i = e->e_phnum; i--;) {
found_code = 0;
found_entry = 0;
for (i = 0; i < e->e_phnum; ++i) {
/* validate program header */
if (p[i].p_type == PT_INTERP) {
Pexit(os, exe, 0, "ELF has PT_INTERP which is unsupported");
}
if (p[i].p_type == PT_DYNAMIC) {
Pexit(os, exe, 0, "not a static executable");
Pexit(os, exe, 0, "ELF has PT_DYNAMIC which is unsupported");
}
if (p[i].p_type != PT_LOAD) {
continue;
@ -496,12 +516,39 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd,
if (!p[i].p_memsz) {
continue;
}
if (p[i].p_vaddr & 4095) {
Pexit(os, exe, 0, "APE phdr addr must be 4096-aligned");
if (p[i].p_filesz > p[i].p_memsz) {
Pexit(os, exe, 0, "ELF phdr filesz exceeds memsz");
}
if (p[i].p_offset & 4095) {
Pexit(os, exe, 0, "APE phdr offset must be 4096-aligned");
if ((p[i].p_vaddr & (PAGE_SIZE - 1)) != (p[i].p_offset & (PAGE_SIZE - 1))) {
Pexit(os, exe, 0, "ELF phdr virt/off skew mismatch w.r.t. pagesize");
}
if (p[i].p_vaddr + p[i].p_memsz < p[i].p_vaddr ||
p[i].p_vaddr + p[i].p_memsz + (PAGE_SIZE - 1) < p[i].p_vaddr) {
Pexit(os, exe, 0, "ELF phdr vaddr+memsz overflow");
}
if (p[i].p_vaddr + p[i].p_filesz < p[i].p_vaddr ||
p[i].p_vaddr + p[i].p_filesz + (PAGE_SIZE - 1) < p[i].p_vaddr) {
Pexit(os, exe, 0, "ELF phdr vaddr+files overflow");
}
a = p[i].p_vaddr & -PAGE_SIZE;
b = (p[i].p_vaddr + p[i].p_memsz + (PAGE_SIZE - 1)) & -PAGE_SIZE;
if (MAX(a, 0) < MIN(b, NULL_PAGE)) {
Pexit(os, exe, 0, "ELF overlaps NULL page");
}
if (MAX(a, (unsigned long)__executable_start) <
MIN(b, (unsigned long)_end)) {
Pexit(os, exe, 0, "ELF overlaps your APE loader");
}
for (j = i + 1; j < e->e_phnum; ++j) {
if (p[j].p_type != PT_LOAD) continue;
c = p[j].p_vaddr & -PAGE_SIZE;
d = (p[j].p_vaddr + p[j].p_memsz + (PAGE_SIZE - 1)) & -PAGE_SIZE;
if (MAX(a, c) < MIN(b, d)) {
Pexit(os, exe, 0, "ELF overlaps its own vaspace");
}
}
/* configure mapping */
prot = 0;
flags = MAP_FIXED | MAP_PRIVATE;
if (p[i].p_flags & PF_R) {
@ -512,47 +559,76 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd,
}
if (p[i].p_flags & PF_X) {
prot |= PROT_EXEC;
code = p[i].p_vaddr;
codesize = p[i].p_filesz;
if (!found_code) {
code = p[i].p_vaddr;
codesize = p[i].p_filesz;
}
if (p[i].p_vaddr <= e->e_entry &&
e->e_entry < p[i].p_vaddr + p[i].p_memsz) {
found_entry = 1;
}
}
/* load from file */
if (p[i].p_filesz) {
if ((rc = Mmap((void *)p[i].p_vaddr, p[i].p_filesz, prot, flags, fd,
p[i].p_offset, os)) < 0) {
Pexit(os, exe, rc, "image mmap");
void *addr;
unsigned long size;
int dirty, prot1, prot2;
dirty = 0;
prot1 = prot;
prot2 = prot;
a = p[i].p_vaddr + p[i].p_filesz;
b = (a + (PAGE_SIZE - 1)) & -PAGE_SIZE;
c = p[i].p_vaddr + p[i].p_memsz;
if (b > c) b = c;
if (c > b) {
dirty = 1;
if (~prot1 & PROT_WRITE) {
prot1 = PROT_READ | PROT_WRITE;
}
}
if ((a = Min(-p[i].p_filesz & 4095, p[i].p_memsz - p[i].p_filesz))) {
MemSet((void *)(p[i].p_vaddr + p[i].p_filesz), 0, a);
addr = (void *)(p[i].p_vaddr & -PAGE_SIZE);
size = (p[i].p_vaddr & (PAGE_SIZE - 1)) + p[i].p_filesz;
rc = Mmap(addr, size, prot1, flags, fd, p[i].p_offset & -PAGE_SIZE, os);
if (rc < 0) Pexit(os, exe, rc, "prog mmap");
if (dirty) MemSet((void *)a, 0, b - a);
if (prot2 != prot1) {
rc = Mprotect(addr, size, prot2, os);
if (rc < 0) Pexit(os, exe, rc, "prog mprotect");
}
}
if ((b = Roundup(p[i].p_memsz, 4096)) >
(a = Roundup(p[i].p_filesz, 4096))) {
if ((rc = Mmap((void *)p[i].p_vaddr + a, b - a, prot,
flags | MAP_ANONYMOUS, -1, 0, os)) < 0) {
Pexit(os, exe, rc, "bss mmap");
}
/* allocate extra bss */
a = p[i].p_vaddr + p[i].p_filesz;
a = (a + (PAGE_SIZE - 1)) & -PAGE_SIZE;
b = p[i].p_vaddr + p[i].p_memsz;
if (b > a) {
rc = Mmap((void *)a, b - a, prot, flags | MAP_ANONYMOUS, 0, 0, os);
if (rc < 0) Pexit(os, exe, rc, "bss mmap");
}
}
if (!code) {
Pexit(os, exe, 0, "ELF needs PT_LOAD phdr w/ PF_X");
}
/* finish up */
if (!found_entry) {
Pexit(os, exe, 0, "ELF entrypoint not found in PT_LOAD with PF_X");
}
Close(fd, os);
Msyscall(code, codesize, os);
// we clear all the general registers we can to have some wiggle room
// to extend the behavior of this loader in the future. we don't need
// to clear the xmm registers since the ape loader should be compiled
// with the -mgeneral-regs-only flag.
/* we clear all the general registers we can to have some wiggle room
to extend the behavior of this loader in the future. we don't need
to clear the xmm registers since the ape loader should be compiled
with the -mgeneral-regs-only flag. */
asm volatile("xor\t%%eax,%%eax\n\t"
"xor\t%%r8d,%%r8d\n\t"
"xor\t%%r9d,%%r9d\n\t"
"xor\t%%r10d,%%r10d\n\t"
"xor\t%%r11d,%%r11d\n\t"
"xor\t%%ebx,%%ebx\n\t" // netbsd doesnt't clear this
"xor\t%%r12d,%%r12d\n\t" // netbsd doesnt't clear this
"xor\t%%r13d,%%r13d\n\t" // netbsd doesnt't clear this
"xor\t%%r14d,%%r14d\n\t" // netbsd doesnt't clear this
"xor\t%%r15d,%%r15d\n\t" // netbsd doesnt't clear this
"xor\t%%ebx,%%ebx\n\t" /* netbsd doesnt't clear this */
"xor\t%%r12d,%%r12d\n\t" /* netbsd doesnt't clear this */
"xor\t%%r13d,%%r13d\n\t" /* netbsd doesnt't clear this */
"xor\t%%r14d,%%r14d\n\t" /* netbsd doesnt't clear this */
"xor\t%%r15d,%%r15d\n\t" /* netbsd doesnt't clear this */
"mov\t%%rdx,%%rsp\n\t"
"xor\t%%edx,%%edx\n\t"
"push\t%%rsi\n\t"
@ -566,16 +642,30 @@ __attribute__((__noreturn__)) static void Spawn(int os, const char *exe, int fd,
}
static void TryElf(struct ApeLoader *M, const char *exe, int fd, long *sp,
int os) {
unsigned size;
if (Read32(M->ehdr.buf) == Read32("\177ELF") && //
M->ehdr.ehdr.e_type == ET_EXEC && //
M->ehdr.ehdr.e_machine == EM_NEXGEN32E && //
M->ehdr.ehdr.e_ident[EI_CLASS] != ELFCLASS32 && //
M->ehdr.ehdr.e_phentsize >= sizeof(M->phdr.phdr) && //
(size = (unsigned)M->ehdr.ehdr.e_phnum * M->ehdr.ehdr.e_phentsize) <=
sizeof(M->phdr.buf) &&
long *auxv, int os) {
unsigned size = M->ehdr.ehdr.e_phnum;
if (Read32(M->ehdr.buf) == Read32("\177ELF") &&
M->ehdr.ehdr.e_type == ET_EXEC &&
M->ehdr.ehdr.e_machine == EM_NEXGEN32E &&
M->ehdr.ehdr.e_ident[EI_CLASS] != ELFCLASS32 &&
M->ehdr.ehdr.e_phentsize >= sizeof(M->phdr.phdr) &&
(size *= M->ehdr.ehdr.e_phentsize) <= sizeof(M->phdr.buf) &&
Pread(fd, M->phdr.buf, size, M->ehdr.ehdr.e_phoff, os) == size) {
for (; *auxv; auxv += 2) {
switch (*auxv) {
case AT_PHDR:
auxv[1] = (unsigned long)&M->phdr;
break;
case AT_PHENT:
auxv[1] = M->ehdr.ehdr.e_phentsize;
break;
case AT_PHNUM:
auxv[1] = M->ehdr.ehdr.e_phnum;
break;
default:
break;
}
}
Spawn(os, exe, fd, sp, &M->ehdr.ehdr, &M->phdr.phdr);
}
}
@ -588,7 +678,7 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
long *auxv, *ap, *ew;
char *p, *exe, *prog, **argv, **envp;
// detect freebsd
/* detect freebsd */
if (SupportsXnu() && dl == XNU) {
os = XNU;
} else if (SupportsFreebsd() && di) {
@ -598,7 +688,7 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
os = 0;
}
// extract arguments
/* extract arguments */
argc = *sp;
argv = (char **)(sp + 1);
envp = (char **)(sp + 1 + argc + 1);
@ -609,12 +699,12 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
}
}
// detect openbsd
/* detect openbsd */
if (SupportsOpenbsd() && !os && !auxv[0]) {
os = OPENBSD;
}
// detect netbsd and find end of words
/* detect netbsd and find end of words */
for (ap = auxv; ap[0]; ap += 2) {
if (SupportsNetbsd() && !os && ap[0] == AT_EXECFN_NETBSD) {
os = NETBSD;
@ -622,23 +712,23 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
}
ew = ap + 1;
// allocate loader memory
/* allocate loader memory */
n = sizeof(*M) / sizeof(long);
MemMove(sp - n, sp, (char *)ew - (char *)sp);
sp -= n, argv -= n, envp -= n, auxv -= n;
M = (struct ApeLoader *)(ew - n);
// default operating system
/* default operating system */
if (!os) {
os = LINUX;
}
// we can load via shell, shebang, or binfmt_misc
/* we can load via shell, shebang, or binfmt_misc */
if (argc >= 3 && !StrCmp(argv[1], "-")) {
// if the first argument is a hyphen then we give the user the
// power to change argv[0] or omit it entirely. most operating
// systems don't permit the omission of argv[0] but we do, b/c
// it's specified by ANSI X3.159-1988.
/* if the first argument is a hyphen then we give the user the
power to change argv[0] or omit it entirely. most operating
systems don't permit the omission of argv[0] but we do, b/c
it's specified by ANSI X3.159-1988. */
prog = (char *)sp[3];
argc = sp[3] = sp[0] - 3;
argv = (char **)((sp += 3) + 1);
@ -646,7 +736,7 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
Print(os, 2,
"usage: ape PROG [ARGV1,ARGV2,...]\n"
" ape - PROG [ARGV0,ARGV1,...]\n"
"αcτµαlly pδrταblε εxεcµταblε loader v1.1\n"
"αcτµαlly pδrταblε εxεcµταblε loader v1.2\n"
"copyright 2022 justine alexandra roberts tunney\n"
"https://justine.lol/ape.html\n",
0l);
@ -657,7 +747,7 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
argv = (char **)((sp += 1) + 1);
}
// resolve path of executable and read its first page
/* resolve path of executable and read its first page */
if (!(exe = Commandv(&M->ps, os, prog, GetEnv(envp, "PATH")))) {
Pexit(os, prog, 0, "not found (maybe chmod +x)");
} else if ((fd = Open(exe, O_RDONLY, 0, os)) < 0) {
@ -668,17 +758,17 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
Pexit(os, exe, 0, "too small");
}
// change argv[0] to resolved path if it's ambiguous
/* change argv[0] to resolved path if it's ambiguous */
if (argc > 0 && *prog != '/' && *exe == '/' && !StrCmp(prog, argv[0])) {
argv[0] = exe;
}
// ape intended behavior
// 1. if file is a native executable, try to run it natively
// 2. if ape, will scan shell script for elf printf statements
// 3. shell script may have multiple lines producing elf headers
// 4. all elf printf lines must exist in the first 4096 bytes of file
// 5. elf program headers may appear anywhere in the binary
/* ape intended behavior
1. if file is a native executable, try to run it natively
2. if ape, will scan shell script for elf printf statements
3. shell script may have multiple lines producing elf headers
4. all elf printf lines must exist in the first 4096 bytes of file
5. elf program headers may appear anywhere in the binary */
if ((IsXnu() && Read32(M->ehdr.buf) == 0xFEEDFACE + 1) ||
(!IsXnu() && Read32(M->ehdr.buf) == Read32("\177ELF"))) {
Close(fd, os);
@ -708,10 +798,10 @@ __attribute__((__noreturn__)) void ApeLoader(long di, long *sp, char dl) {
M->ehdr.buf[i++] = c;
}
if (i >= sizeof(M->ehdr.ehdr)) {
TryElf(M, exe, fd, sp, os);
TryElf(M, exe, fd, sp, auxv, os);
}
}
}
TryElf(M, exe, fd, sp, os);
TryElf(M, exe, fd, sp, auxv, os);
Pexit(os, exe, 0, "Not an acceptable APE/ELF executable for x86-64");
}