Introduce new linker for fat ape binaries

This commit is contained in:
Justine Tunney 2023-08-11 04:37:23 -07:00
parent e3c456d23a
commit 0105e3e2b6
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 3140 additions and 867 deletions

View file

@ -20,13 +20,16 @@
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/dce.h"
#include "libc/elf/def.h"
#include "libc/elf/struct/ehdr.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/bits.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/check.h"
#include "libc/limits.h"
#include "libc/macho.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/stdckdint.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/msync.h"
@ -35,85 +38,129 @@
#include "third_party/getopt/getopt.internal.h"
#include "third_party/regex/regex.h"
__static_yoink("strerror_wr");
#define VERSION \
"actually portable executable assimilate v1.6\n" \
"copyright 2023 justine alexandra roberts tunney\n"
// options used: fhem
// letters not used: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdgijklnopqrstuvwxyz
// digits not used: 0123456789
// puncts not used: !"#$%&'()*+,-./;<=>@[\]^_`{|}~
// letters duplicated: none
#define GETOPTS "fem"
#define USAGE \
"usage: assimilate.com [-hvfem] COMFILE...\n" \
" -h show help\n" \
" -v show version\n" \
" -f ignore soft errors\n" \
" -e convert to elf regardless of host os\n" \
" -m convert to macho regardless of host os\n" \
" -x convert to amd64 regardless of host cpu\n" \
" -a convert to arm64 regardless of host cpu\n"
#define USAGE \
"\
Usage: assimilate.com [-hfem] COMFILE...\n\
-h show help\n\
-e force elf\n\
-m force macho\n\
-f continue on soft errors\n\
\n\
αcτµαlly pδrταblε εxεcµταblε assimilate v1.o\n\
copyright 2022 justine alexandra roberts tunney\n\
https://twitter.com/justinetunney\n\
https://linkedin.com/in/jtunney\n\
https://justine.lol/ape.html\n\
https://github.com/jart\n\
\n\
This program converts Actually Portable Executable files so they're\n\
in the platform-local executable format, rather than the multiplatform\n\
APE shell script format. This is useful on UNIX operating systems when\n\
you want to use your APE programs as script interpreter or for setuid.\n\
"
#define ARCH_NATIVE 0
#define ARCH_AMD64 1
#define ARCH_ARM64 2
#define MODE_NATIVE 0
#define MODE_ELF 1
#define MODE_MACHO 2
#define MODE_PE 3
#define FORMAT_NATIVE 0
#define FORMAT_ELF 1
#define FORMAT_MACHO 2
#define FORMAT_PE 3
int g_mode;
bool g_force;
int exitcode;
const char *prog;
char errstr[128];
static int g_arch;
static int g_format;
static bool g_force;
static const char *prog;
static const char *path;
static char errstr[128];
static bool got_format_flag;
void GetOpts(int argc, char *argv[]) {
static wontreturn void Die(const char *thing, const char *reason) {
const char *native_explainer;
if (got_format_flag) {
native_explainer = "";
} else if (IsXnu()) {
native_explainer = " (the host os uses macho natively)";
} else if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) {
native_explainer = " (the host os uses elf natively)";
} else {
native_explainer = " (the host os uses pe natively)";
}
tinyprint(2, thing, ": ", reason, native_explainer, "\n", NULL);
exit(1);
}
static wontreturn void DieSys(const char *thing) {
perror(thing);
exit(1);
}
static int Atoi(const char *s) {
int x;
if ((x = atoi(s)) == INT_MAX) {
Die(path, "integer overflow parsing ape macho dd argument");
}
return x;
}
static void GetOpts(int argc, char *argv[]) {
int opt;
while ((opt = getopt(argc, argv, GETOPTS)) != -1) {
while ((opt = getopt(argc, argv, "hvfemxa")) != -1) {
switch (opt) {
case 'f':
g_force = true;
break;
case 'e':
g_mode = MODE_ELF;
g_format = FORMAT_ELF;
got_format_flag = true;
break;
case 'm':
g_mode = MODE_MACHO;
g_format = FORMAT_MACHO;
got_format_flag = true;
break;
case 'x':
g_arch = ARCH_AMD64;
break;
case 'a':
g_arch = ARCH_ARM64;
break;
case 'v':
tinyprint(1, VERSION, NULL);
exit(0);
case 'h':
write(1, USAGE, sizeof(USAGE) - 1);
tinyprint(1, VERSION, USAGE, NULL);
exit(0);
default:
write(2, USAGE, sizeof(USAGE) - 1);
exit(64);
tinyprint(2, VERSION, USAGE, NULL);
exit(1);
}
}
if (g_mode == MODE_NATIVE) {
if (optind == argc) {
Die(prog, "missing operand");
}
if (g_format == FORMAT_NATIVE) {
if (IsXnu()) {
g_mode = MODE_MACHO;
g_format = FORMAT_MACHO;
} else if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) {
g_mode = MODE_ELF;
g_format = FORMAT_ELF;
} else {
g_mode = MODE_PE;
g_format = FORMAT_PE;
}
}
if (g_arch == ARCH_NATIVE) {
#ifdef __aarch64__
g_arch = ARCH_ARM64;
#else
g_arch = ARCH_AMD64;
#endif
}
if (g_format == FORMAT_PE && g_arch == ARCH_ARM64) {
Die(prog, "native arm64 on windows not supported yet");
}
}
void GetElfHeader(char ehdr[hasatleast 64], const char *image, size_t n) {
char *p;
static void GetElfHeader(char ehdr[hasatleast 64], const char *image,
size_t n) {
int c, i;
for (p = image; p < image + MIN(n, 4096); ++p) {
char *p, *e;
for (p = image, e = p + MIN(n, 8192); p < e; ++p) {
TryAgain:
if (READ64LE(p) != READ64LE("printf '")) continue;
for (i = 0, p += 8; p + 3 < image + MIN(n, 4096) && (c = *p++) != '\'';) {
for (i = 0, p += 8; p + 3 < e && (c = *p++) != '\'';) {
if (c == '\\') {
if ('0' <= *p && *p <= '7') {
c = *p++ - '0';
@ -130,29 +177,35 @@ void GetElfHeader(char ehdr[hasatleast 64], const char *image, size_t n) {
if (i < 64) {
ehdr[i++] = c;
} else {
kprintf("%s: ape printf elf header too long\n", prog);
exit(1);
goto TryAgain;
}
}
if (i != 64) {
kprintf("%s: ape printf elf header too short\n", prog);
exit(2);
}
if (READ32LE(ehdr) != READ32LE("\177ELF")) {
kprintf("%s: ape printf elf header didn't have elf magic\n", prog);
exit(3);
if (i != 64 || //
READ32LE(ehdr) != READ32LE("\177ELF") || //
ehdr[EI_CLASS] == ELFCLASS32 || //
READ16LE(ehdr + 18) !=
(g_arch == ARCH_AMD64 ? EM_NEXGEN32E : EM_AARCH64)) {
goto TryAgain;
}
return;
}
kprintf("%s: printf statement not found in first 4096 bytes\n", prog);
exit(4);
switch (g_arch) {
case ARCH_AMD64:
Die(path, "printf statement not found in first 8192 bytes of image "
"containing elf64 ehdr for amd64");
case ARCH_ARM64:
Die(path, "printf statement not found in first 8192 bytes of image "
"containing elf64 ehdr for arm64");
default:
__builtin_unreachable();
}
}
void GetMachoPayload(const char *image, size_t imagesize, int *out_offset,
int *out_size) {
static void GetMachoPayload(const char *image, size_t imagesize,
int *out_offset, int *out_size) {
regex_t rx;
const char *script;
regmatch_t rm[1 + 3] = {0};
regmatch_t rm[1 + 13] = {0};
int rc, skip, count, bs, offset, size;
if ((script = memmem(image, imagesize, "'\n#'\"\n", 6))) {
@ -160,147 +213,239 @@ void GetMachoPayload(const char *image, size_t imagesize, int *out_offset,
} else if ((script = memmem(image, imagesize, "#'\"\n", 4))) {
script += 4;
} else {
kprintf("%s: ape shell script not found\n", prog);
exit(5);
Die(path, "ape shell script not found");
}
DCHECK_EQ(REG_OK, regcomp(&rx,
"bs=\"?\\$?(*([ [:digit:]]+))*\"? "
"skip=\"?\\$?(*([ [:digit:]]+))*\"? "
"count=\"?\\$?(*([ [:digit:]]+))*\"?",
REG_EXTENDED));
rc = regexec(&rx, script, 4, rm, 0);
// the ape shell script has always historically used `dd` to
// assimilate binaries to the mach-o file format but we have
// formatted the arguments in a variety of different ways eg
//
// - `arg=" 9293"` is how we originally had ape do it
// - `arg=$(( 9293))` b/c busybox sh disliked quoted space
// - `arg=9293 ` is generated by modern apelink.com program
//
unassert(regcomp(&rx,
"bs=" // dd block size arg
"(['\"] *)?" // #1 optional quote w/ space
"(\\$\\(\\( *)?" // #2 optional math w/ space
"([[:digit:]]+)" // #3
"( *\\)\\))?" // #4 optional math w/ space
"( *['\"])?" // #5 optional quote w/ space
" +" //
"skip=" // dd skip arg
"(['\"] *)?" // #6 optional quote w/ space
"(\\$\\(\\( *)?" // #7 optional math w/ space
"([[:digit:]]+)" // #8
"( *\\)\\))?" // #9 optional math w/ space
"( *['\"])?" // #10 optional quote w/ space
" +" //
"count=" // dd count arg
"(['\"] *)?" // #11 optional quote w/ space
"(\\$\\(\\( *)?" // #12 optional math w/ space
"([[:digit:]]+)", // #13
REG_EXTENDED) == REG_OK);
int i = 0;
TryAgain:
rc = regexec(&rx, script + i, 1 + 13, rm, 0);
if (rc != REG_OK) {
if (rc == REG_NOMATCH) {
kprintf("%s: ape macho dd command not found\n", prog);
exit(6);
unassert(rc == REG_NOMATCH);
switch (g_arch) {
case ARCH_AMD64:
Die(path, "ape macho dd command for amd64 not found");
case ARCH_ARM64:
Die(path, "ape macho dd command for arm64 not found");
default:
__builtin_unreachable();
}
regerror(rc, &rx, errstr, sizeof(errstr));
kprintf("%s: ape macho dd regex failed: %s\n", prog, errstr);
exit(7);
}
bs = atoi(script + rm[1].rm_so);
skip = atoi(script + rm[2].rm_so);
count = atoi(script + rm[3].rm_so);
if (ckd_mul(&offset, skip, bs) || ckd_mul(&size, count, bs)) {
kprintf("%s: integer overflow parsing macho\n");
exit(8);
i += rm[13].rm_eo;
bs = Atoi(script + rm[3].rm_so);
skip = Atoi(script + rm[8].rm_so);
count = Atoi(script + rm[13].rm_so);
if (ckd_mul(&offset, skip, bs)) {
Die(path, "integer overflow computing ape macho dd offset");
}
if (ckd_mul(&size, count, bs)) {
Die(path, "integer overflow computing ape macho dd size");
}
if (offset < 64) {
kprintf("%s: ape macho dd offset should be ≥64: %d\n", prog, offset);
exit(9);
Die(path, "ape macho dd offset must be at least 64");
}
if (offset >= imagesize) {
kprintf("%s: ape macho dd offset is outside file: %d\n", prog, offset);
exit(10);
Die(path, "ape macho dd offset points outside image");
}
if (size < 32) {
kprintf("%s: ape macho dd size should be ≥32: %d\n", prog, size);
exit(11);
Die(path, "ape macho dd size must be at least 32");
}
if (size > imagesize - offset) {
kprintf("%s: ape macho dd size is outside file: %d\n", prog, size);
exit(12);
Die(path, "ape macho dd size overlaps end of image");
exit(1);
}
if (READ32LE(image + offset) != 0xFEEDFACE + 1 ||
READ32LE(image + offset + 4) !=
(g_arch == ARCH_AMD64 ? MAC_CPU_NEXGEN32E : MAC_CPU_ARM64)) {
goto TryAgain;
}
*out_offset = offset;
*out_size = size;
regfree(&rx);
}
void AssimilateElf(char *p, size_t n) {
static void AssimilateElf(char *p, size_t n) {
char ehdr[64];
GetElfHeader(ehdr, p, n);
memcpy(p, ehdr, 64);
msync(p, 4096, MS_SYNC);
}
void AssimilateMacho(char *p, size_t n) {
static void AssimilateMacho(char *p, size_t n) {
int offset, size;
GetMachoPayload(p, n, &offset, &size);
memmove(p, p + offset, size);
msync(p, n, MS_SYNC);
}
void Assimilate(void) {
static void Assimilate(void) {
int fd;
char *p;
struct stat st;
if ((fd = open(prog, O_RDWR)) == -1) {
kprintf("%s: open(O_RDWR) failed: %m\n", prog);
exit(13);
}
if (fstat(fd, &st) == -1) {
kprintf("%s: fstat() failed: %m\n", prog);
exit(14);
}
if (st.st_size < 64) {
kprintf("%s: ape binaries must be at least 64 bytes\n", prog);
exit(15);
}
if ((p = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) ==
MAP_FAILED) {
kprintf("%s: mmap failed: %m\n", prog);
exit(16);
}
if (g_mode == MODE_PE) {
if (READ16LE(p) == READ16LE("MZ")) {
if (!g_force) {
kprintf("%s: program is already an elf binary\n", prog);
if (g_mode != MODE_ELF) {
exitcode = 1;
}
}
goto Finish;
} else {
kprintf("%s: currently cannot back-convert to pe\n", prog);
exit(17);
}
}
ssize_t size;
if ((fd = open(path, O_RDWR)) == -1) DieSys(path);
if ((size = lseek(fd, 0, SEEK_END)) == -1) DieSys(path);
if (size < 64) Die(path, "ape executables must be at least 64 bytes");
p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED) DieSys(path);
if (READ32LE(p) == READ32LE("\177ELF")) {
if (!g_force) {
kprintf("%s: program is already an elf binary\n", prog);
if (g_mode != MODE_ELF) {
exitcode = 1;
}
Elf64_Ehdr *ehdr;
switch (g_format) {
case FORMAT_ELF:
ehdr = (Elf64_Ehdr *)p;
if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) {
Die(path, "32-bit elf not supported");
}
switch (g_arch) {
case ARCH_AMD64:
switch (ehdr->e_machine) {
case EM_NEXGEN32E:
if (g_force) {
exit(0);
} else {
Die(path, "already an elf amd64 executable");
}
case EM_AARCH64:
Die(path, "can't assimilate elf arm64 to elf amd64");
default:
Die(path, "elf has unsupported architecture");
}
case ARCH_ARM64:
switch (ehdr->e_machine) {
case EM_AARCH64:
if (g_force) {
exit(0);
} else {
Die(path, "already an elf arm64 executable");
}
case EM_NEXGEN32E:
Die(path, "can't assimilate elf amd64 to elf arm64");
default:
Die(path, "elf has unsupported architecture");
}
default:
__builtin_unreachable();
}
case FORMAT_MACHO:
Die(path, "can't assimilate elf to macho");
case FORMAT_PE:
Die(path, "can't assimilate elf to pe (try elf2pe.com)");
default:
__builtin_unreachable();
}
goto Finish;
}
if (READ32LE(p) == 0xFEEDFACE + 1) {
if (!g_force) {
kprintf("%s: program is already a mach-o binary\n", prog);
if (g_mode != MODE_MACHO) {
exitcode = 1;
}
struct MachoHeader *macho;
switch (g_format) {
case FORMAT_MACHO:
macho = (struct MachoHeader *)p;
switch (g_arch) {
case ARCH_AMD64:
switch (macho->arch) {
case MAC_CPU_NEXGEN32E:
if (g_force) {
exit(0);
} else {
Die(path, "already a macho amd64 executable");
}
case MAC_CPU_ARM64:
Die(path, "can't assimilate macho arm64 to macho amd64");
default:
Die(path, "macho has unsupported architecture");
}
case ARCH_ARM64:
switch (macho->arch) {
case MAC_CPU_ARM64:
if (g_force) {
exit(0);
} else {
Die(path, "already a macho arm64 executable");
}
case MAC_CPU_NEXGEN32E:
Die(path, "can't assimilate macho amd64 to macho arm64");
default:
Die(path, "macho has unsupported architecture");
}
default:
__builtin_unreachable();
}
case FORMAT_ELF:
Die(path, "can't assimilate macho to elf");
case FORMAT_PE:
Die(path, "can't assimilate macho to pe");
default:
__builtin_unreachable();
}
goto Finish;
}
if (READ64LE(p) != READ64LE("MZqFpD='")) {
kprintf("%s: this file is not an actually portable executable\n", prog);
exit(17);
if (READ64LE(p) != READ64LE("MZqFpD='") && //
READ64LE(p) != READ64LE("jartsr='") && //
READ64LE(p) != READ64LE("APEDBG='")) {
Die(path, "not an actually portable executable");
}
if (g_mode == MODE_ELF) {
AssimilateElf(p, st.st_size);
} else if (g_mode == MODE_MACHO) {
AssimilateMacho(p, st.st_size);
if (g_format == FORMAT_PE) {
if (READ16LE(p) == READ16LE("MZ")) {
if (g_force) {
exit(0);
} else {
Die(path, "this ape file is already a pe file");
}
} else {
Die(path, "this ape file was built without pe support");
}
}
Finish:
if (munmap(p, st.st_size) == -1) {
kprintf("%s: munmap() failed: %m\n", prog);
exit(18);
if (g_format == FORMAT_ELF) {
AssimilateElf(p, size);
} else if (g_format == FORMAT_MACHO) {
AssimilateMacho(p, size);
}
if (munmap(p, size)) DieSys(path);
if (close(fd)) DieSys(path);
}
int main(int argc, char *argv[]) {
int i;
prog = argv[0];
if (!prog) prog = "assimilate";
GetOpts(argc, argv);
if (optind == argc) {
kprintf("error: need at least one program path to assimilate\n");
write(2, USAGE, sizeof(USAGE) - 1);
exit(64);
}
for (i = optind; i < argc; ++i) {
prog = argv[i];
for (int i = optind; i < argc; ++i) {
path = argv[i];
Assimilate();
}
return exitcode;
}