Introduce new linker for fat ape binaries

This commit is contained in:
Justine Tunney 2023-08-11 04:37:23 -07:00
parent e3c456d23a
commit 0105e3e2b6
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
44 changed files with 3140 additions and 867 deletions

2164
tool/build/apelink.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -20,13 +20,16 @@
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/dce.h"
#include "libc/elf/def.h"
#include "libc/elf/struct/ehdr.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/bits.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/check.h"
#include "libc/limits.h"
#include "libc/macho.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/stdckdint.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/msync.h"
@ -35,85 +38,129 @@
#include "third_party/getopt/getopt.internal.h"
#include "third_party/regex/regex.h"
__static_yoink("strerror_wr");
#define VERSION \
"actually portable executable assimilate v1.6\n" \
"copyright 2023 justine alexandra roberts tunney\n"
// options used: fhem
// letters not used: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdgijklnopqrstuvwxyz
// digits not used: 0123456789
// puncts not used: !"#$%&'()*+,-./;<=>@[\]^_`{|}~
// letters duplicated: none
#define GETOPTS "fem"
#define USAGE \
"usage: assimilate.com [-hvfem] COMFILE...\n" \
" -h show help\n" \
" -v show version\n" \
" -f ignore soft errors\n" \
" -e convert to elf regardless of host os\n" \
" -m convert to macho regardless of host os\n" \
" -x convert to amd64 regardless of host cpu\n" \
" -a convert to arm64 regardless of host cpu\n"
#define USAGE \
"\
Usage: assimilate.com [-hfem] COMFILE...\n\
-h show help\n\
-e force elf\n\
-m force macho\n\
-f continue on soft errors\n\
\n\
αcτµαlly pδrταblε εxεcµταblε assimilate v1.o\n\
copyright 2022 justine alexandra roberts tunney\n\
https://twitter.com/justinetunney\n\
https://linkedin.com/in/jtunney\n\
https://justine.lol/ape.html\n\
https://github.com/jart\n\
\n\
This program converts Actually Portable Executable files so they're\n\
in the platform-local executable format, rather than the multiplatform\n\
APE shell script format. This is useful on UNIX operating systems when\n\
you want to use your APE programs as script interpreter or for setuid.\n\
"
#define ARCH_NATIVE 0
#define ARCH_AMD64 1
#define ARCH_ARM64 2
#define MODE_NATIVE 0
#define MODE_ELF 1
#define MODE_MACHO 2
#define MODE_PE 3
#define FORMAT_NATIVE 0
#define FORMAT_ELF 1
#define FORMAT_MACHO 2
#define FORMAT_PE 3
int g_mode;
bool g_force;
int exitcode;
const char *prog;
char errstr[128];
static int g_arch;
static int g_format;
static bool g_force;
static const char *prog;
static const char *path;
static char errstr[128];
static bool got_format_flag;
void GetOpts(int argc, char *argv[]) {
static wontreturn void Die(const char *thing, const char *reason) {
const char *native_explainer;
if (got_format_flag) {
native_explainer = "";
} else if (IsXnu()) {
native_explainer = " (the host os uses macho natively)";
} else if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) {
native_explainer = " (the host os uses elf natively)";
} else {
native_explainer = " (the host os uses pe natively)";
}
tinyprint(2, thing, ": ", reason, native_explainer, "\n", NULL);
exit(1);
}
static wontreturn void DieSys(const char *thing) {
perror(thing);
exit(1);
}
static int Atoi(const char *s) {
int x;
if ((x = atoi(s)) == INT_MAX) {
Die(path, "integer overflow parsing ape macho dd argument");
}
return x;
}
static void GetOpts(int argc, char *argv[]) {
int opt;
while ((opt = getopt(argc, argv, GETOPTS)) != -1) {
while ((opt = getopt(argc, argv, "hvfemxa")) != -1) {
switch (opt) {
case 'f':
g_force = true;
break;
case 'e':
g_mode = MODE_ELF;
g_format = FORMAT_ELF;
got_format_flag = true;
break;
case 'm':
g_mode = MODE_MACHO;
g_format = FORMAT_MACHO;
got_format_flag = true;
break;
case 'x':
g_arch = ARCH_AMD64;
break;
case 'a':
g_arch = ARCH_ARM64;
break;
case 'v':
tinyprint(1, VERSION, NULL);
exit(0);
case 'h':
write(1, USAGE, sizeof(USAGE) - 1);
tinyprint(1, VERSION, USAGE, NULL);
exit(0);
default:
write(2, USAGE, sizeof(USAGE) - 1);
exit(64);
tinyprint(2, VERSION, USAGE, NULL);
exit(1);
}
}
if (g_mode == MODE_NATIVE) {
if (optind == argc) {
Die(prog, "missing operand");
}
if (g_format == FORMAT_NATIVE) {
if (IsXnu()) {
g_mode = MODE_MACHO;
g_format = FORMAT_MACHO;
} else if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) {
g_mode = MODE_ELF;
g_format = FORMAT_ELF;
} else {
g_mode = MODE_PE;
g_format = FORMAT_PE;
}
}
if (g_arch == ARCH_NATIVE) {
#ifdef __aarch64__
g_arch = ARCH_ARM64;
#else
g_arch = ARCH_AMD64;
#endif
}
if (g_format == FORMAT_PE && g_arch == ARCH_ARM64) {
Die(prog, "native arm64 on windows not supported yet");
}
}
void GetElfHeader(char ehdr[hasatleast 64], const char *image, size_t n) {
char *p;
static void GetElfHeader(char ehdr[hasatleast 64], const char *image,
size_t n) {
int c, i;
for (p = image; p < image + MIN(n, 4096); ++p) {
char *p, *e;
for (p = image, e = p + MIN(n, 8192); p < e; ++p) {
TryAgain:
if (READ64LE(p) != READ64LE("printf '")) continue;
for (i = 0, p += 8; p + 3 < image + MIN(n, 4096) && (c = *p++) != '\'';) {
for (i = 0, p += 8; p + 3 < e && (c = *p++) != '\'';) {
if (c == '\\') {
if ('0' <= *p && *p <= '7') {
c = *p++ - '0';
@ -130,29 +177,35 @@ void GetElfHeader(char ehdr[hasatleast 64], const char *image, size_t n) {
if (i < 64) {
ehdr[i++] = c;
} else {
kprintf("%s: ape printf elf header too long\n", prog);
exit(1);
goto TryAgain;
}
}
if (i != 64) {
kprintf("%s: ape printf elf header too short\n", prog);
exit(2);
}
if (READ32LE(ehdr) != READ32LE("\177ELF")) {
kprintf("%s: ape printf elf header didn't have elf magic\n", prog);
exit(3);
if (i != 64 || //
READ32LE(ehdr) != READ32LE("\177ELF") || //
ehdr[EI_CLASS] == ELFCLASS32 || //
READ16LE(ehdr + 18) !=
(g_arch == ARCH_AMD64 ? EM_NEXGEN32E : EM_AARCH64)) {
goto TryAgain;
}
return;
}
kprintf("%s: printf statement not found in first 4096 bytes\n", prog);
exit(4);
switch (g_arch) {
case ARCH_AMD64:
Die(path, "printf statement not found in first 8192 bytes of image "
"containing elf64 ehdr for amd64");
case ARCH_ARM64:
Die(path, "printf statement not found in first 8192 bytes of image "
"containing elf64 ehdr for arm64");
default:
__builtin_unreachable();
}
}
void GetMachoPayload(const char *image, size_t imagesize, int *out_offset,
int *out_size) {
static void GetMachoPayload(const char *image, size_t imagesize,
int *out_offset, int *out_size) {
regex_t rx;
const char *script;
regmatch_t rm[1 + 3] = {0};
regmatch_t rm[1 + 13] = {0};
int rc, skip, count, bs, offset, size;
if ((script = memmem(image, imagesize, "'\n#'\"\n", 6))) {
@ -160,147 +213,239 @@ void GetMachoPayload(const char *image, size_t imagesize, int *out_offset,
} else if ((script = memmem(image, imagesize, "#'\"\n", 4))) {
script += 4;
} else {
kprintf("%s: ape shell script not found\n", prog);
exit(5);
Die(path, "ape shell script not found");
}
DCHECK_EQ(REG_OK, regcomp(&rx,
"bs=\"?\\$?(*([ [:digit:]]+))*\"? "
"skip=\"?\\$?(*([ [:digit:]]+))*\"? "
"count=\"?\\$?(*([ [:digit:]]+))*\"?",
REG_EXTENDED));
rc = regexec(&rx, script, 4, rm, 0);
// the ape shell script has always historically used `dd` to
// assimilate binaries to the mach-o file format but we have
// formatted the arguments in a variety of different ways eg
//
// - `arg=" 9293"` is how we originally had ape do it
// - `arg=$(( 9293))` b/c busybox sh disliked quoted space
// - `arg=9293 ` is generated by modern apelink.com program
//
unassert(regcomp(&rx,
"bs=" // dd block size arg
"(['\"] *)?" // #1 optional quote w/ space
"(\\$\\(\\( *)?" // #2 optional math w/ space
"([[:digit:]]+)" // #3
"( *\\)\\))?" // #4 optional math w/ space
"( *['\"])?" // #5 optional quote w/ space
" +" //
"skip=" // dd skip arg
"(['\"] *)?" // #6 optional quote w/ space
"(\\$\\(\\( *)?" // #7 optional math w/ space
"([[:digit:]]+)" // #8
"( *\\)\\))?" // #9 optional math w/ space
"( *['\"])?" // #10 optional quote w/ space
" +" //
"count=" // dd count arg
"(['\"] *)?" // #11 optional quote w/ space
"(\\$\\(\\( *)?" // #12 optional math w/ space
"([[:digit:]]+)", // #13
REG_EXTENDED) == REG_OK);
int i = 0;
TryAgain:
rc = regexec(&rx, script + i, 1 + 13, rm, 0);
if (rc != REG_OK) {
if (rc == REG_NOMATCH) {
kprintf("%s: ape macho dd command not found\n", prog);
exit(6);
unassert(rc == REG_NOMATCH);
switch (g_arch) {
case ARCH_AMD64:
Die(path, "ape macho dd command for amd64 not found");
case ARCH_ARM64:
Die(path, "ape macho dd command for arm64 not found");
default:
__builtin_unreachable();
}
regerror(rc, &rx, errstr, sizeof(errstr));
kprintf("%s: ape macho dd regex failed: %s\n", prog, errstr);
exit(7);
}
bs = atoi(script + rm[1].rm_so);
skip = atoi(script + rm[2].rm_so);
count = atoi(script + rm[3].rm_so);
if (ckd_mul(&offset, skip, bs) || ckd_mul(&size, count, bs)) {
kprintf("%s: integer overflow parsing macho\n");
exit(8);
i += rm[13].rm_eo;
bs = Atoi(script + rm[3].rm_so);
skip = Atoi(script + rm[8].rm_so);
count = Atoi(script + rm[13].rm_so);
if (ckd_mul(&offset, skip, bs)) {
Die(path, "integer overflow computing ape macho dd offset");
}
if (ckd_mul(&size, count, bs)) {
Die(path, "integer overflow computing ape macho dd size");
}
if (offset < 64) {
kprintf("%s: ape macho dd offset should be ≥64: %d\n", prog, offset);
exit(9);
Die(path, "ape macho dd offset must be at least 64");
}
if (offset >= imagesize) {
kprintf("%s: ape macho dd offset is outside file: %d\n", prog, offset);
exit(10);
Die(path, "ape macho dd offset points outside image");
}
if (size < 32) {
kprintf("%s: ape macho dd size should be ≥32: %d\n", prog, size);
exit(11);
Die(path, "ape macho dd size must be at least 32");
}
if (size > imagesize - offset) {
kprintf("%s: ape macho dd size is outside file: %d\n", prog, size);
exit(12);
Die(path, "ape macho dd size overlaps end of image");
exit(1);
}
if (READ32LE(image + offset) != 0xFEEDFACE + 1 ||
READ32LE(image + offset + 4) !=
(g_arch == ARCH_AMD64 ? MAC_CPU_NEXGEN32E : MAC_CPU_ARM64)) {
goto TryAgain;
}
*out_offset = offset;
*out_size = size;
regfree(&rx);
}
void AssimilateElf(char *p, size_t n) {
static void AssimilateElf(char *p, size_t n) {
char ehdr[64];
GetElfHeader(ehdr, p, n);
memcpy(p, ehdr, 64);
msync(p, 4096, MS_SYNC);
}
void AssimilateMacho(char *p, size_t n) {
static void AssimilateMacho(char *p, size_t n) {
int offset, size;
GetMachoPayload(p, n, &offset, &size);
memmove(p, p + offset, size);
msync(p, n, MS_SYNC);
}
void Assimilate(void) {
static void Assimilate(void) {
int fd;
char *p;
struct stat st;
if ((fd = open(prog, O_RDWR)) == -1) {
kprintf("%s: open(O_RDWR) failed: %m\n", prog);
exit(13);
}
if (fstat(fd, &st) == -1) {
kprintf("%s: fstat() failed: %m\n", prog);
exit(14);
}
if (st.st_size < 64) {
kprintf("%s: ape binaries must be at least 64 bytes\n", prog);
exit(15);
}
if ((p = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)) ==
MAP_FAILED) {
kprintf("%s: mmap failed: %m\n", prog);
exit(16);
}
if (g_mode == MODE_PE) {
if (READ16LE(p) == READ16LE("MZ")) {
if (!g_force) {
kprintf("%s: program is already an elf binary\n", prog);
if (g_mode != MODE_ELF) {
exitcode = 1;
}
}
goto Finish;
} else {
kprintf("%s: currently cannot back-convert to pe\n", prog);
exit(17);
}
}
ssize_t size;
if ((fd = open(path, O_RDWR)) == -1) DieSys(path);
if ((size = lseek(fd, 0, SEEK_END)) == -1) DieSys(path);
if (size < 64) Die(path, "ape executables must be at least 64 bytes");
p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (p == MAP_FAILED) DieSys(path);
if (READ32LE(p) == READ32LE("\177ELF")) {
if (!g_force) {
kprintf("%s: program is already an elf binary\n", prog);
if (g_mode != MODE_ELF) {
exitcode = 1;
}
Elf64_Ehdr *ehdr;
switch (g_format) {
case FORMAT_ELF:
ehdr = (Elf64_Ehdr *)p;
if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) {
Die(path, "32-bit elf not supported");
}
switch (g_arch) {
case ARCH_AMD64:
switch (ehdr->e_machine) {
case EM_NEXGEN32E:
if (g_force) {
exit(0);
} else {
Die(path, "already an elf amd64 executable");
}
case EM_AARCH64:
Die(path, "can't assimilate elf arm64 to elf amd64");
default:
Die(path, "elf has unsupported architecture");
}
case ARCH_ARM64:
switch (ehdr->e_machine) {
case EM_AARCH64:
if (g_force) {
exit(0);
} else {
Die(path, "already an elf arm64 executable");
}
case EM_NEXGEN32E:
Die(path, "can't assimilate elf amd64 to elf arm64");
default:
Die(path, "elf has unsupported architecture");
}
default:
__builtin_unreachable();
}
case FORMAT_MACHO:
Die(path, "can't assimilate elf to macho");
case FORMAT_PE:
Die(path, "can't assimilate elf to pe (try elf2pe.com)");
default:
__builtin_unreachable();
}
goto Finish;
}
if (READ32LE(p) == 0xFEEDFACE + 1) {
if (!g_force) {
kprintf("%s: program is already a mach-o binary\n", prog);
if (g_mode != MODE_MACHO) {
exitcode = 1;
}
struct MachoHeader *macho;
switch (g_format) {
case FORMAT_MACHO:
macho = (struct MachoHeader *)p;
switch (g_arch) {
case ARCH_AMD64:
switch (macho->arch) {
case MAC_CPU_NEXGEN32E:
if (g_force) {
exit(0);
} else {
Die(path, "already a macho amd64 executable");
}
case MAC_CPU_ARM64:
Die(path, "can't assimilate macho arm64 to macho amd64");
default:
Die(path, "macho has unsupported architecture");
}
case ARCH_ARM64:
switch (macho->arch) {
case MAC_CPU_ARM64:
if (g_force) {
exit(0);
} else {
Die(path, "already a macho arm64 executable");
}
case MAC_CPU_NEXGEN32E:
Die(path, "can't assimilate macho amd64 to macho arm64");
default:
Die(path, "macho has unsupported architecture");
}
default:
__builtin_unreachable();
}
case FORMAT_ELF:
Die(path, "can't assimilate macho to elf");
case FORMAT_PE:
Die(path, "can't assimilate macho to pe");
default:
__builtin_unreachable();
}
goto Finish;
}
if (READ64LE(p) != READ64LE("MZqFpD='")) {
kprintf("%s: this file is not an actually portable executable\n", prog);
exit(17);
if (READ64LE(p) != READ64LE("MZqFpD='") && //
READ64LE(p) != READ64LE("jartsr='") && //
READ64LE(p) != READ64LE("APEDBG='")) {
Die(path, "not an actually portable executable");
}
if (g_mode == MODE_ELF) {
AssimilateElf(p, st.st_size);
} else if (g_mode == MODE_MACHO) {
AssimilateMacho(p, st.st_size);
if (g_format == FORMAT_PE) {
if (READ16LE(p) == READ16LE("MZ")) {
if (g_force) {
exit(0);
} else {
Die(path, "this ape file is already a pe file");
}
} else {
Die(path, "this ape file was built without pe support");
}
}
Finish:
if (munmap(p, st.st_size) == -1) {
kprintf("%s: munmap() failed: %m\n", prog);
exit(18);
if (g_format == FORMAT_ELF) {
AssimilateElf(p, size);
} else if (g_format == FORMAT_MACHO) {
AssimilateMacho(p, size);
}
if (munmap(p, size)) DieSys(path);
if (close(fd)) DieSys(path);
}
int main(int argc, char *argv[]) {
int i;
prog = argv[0];
if (!prog) prog = "assimilate";
GetOpts(argc, argv);
if (optind == argc) {
kprintf("error: need at least one program path to assimilate\n");
write(2, USAGE, sizeof(USAGE) - 1);
exit(64);
}
for (i = optind; i < argc; ++i) {
prog = argv[i];
for (int i = optind; i < argc; ++i) {
path = argv[i];
Assimilate();
}
return exitcode;
}

View file

@ -47,8 +47,8 @@
#include "libc/sysv/consts/prot.h"
#include "third_party/getopt/getopt.internal.h"
// see tool/hello/hello.c for an example program this can link
// make -j8 m=tiny o/tiny/tool/hello/hello.com
// see tool/hello/hello-pe.c for an example program this can link
// make -j8 m=tiny o/tiny/tool/hello/hello-pe.com
#define VERSION \
"elf2pe v0.1\n" \
@ -571,6 +571,7 @@ static struct Section *LoadSection(struct Elf *elf, int index,
static void LoadSectionsIntoSegments(struct Elf *elf) {
int i;
Elf64_Shdr *shdr;
bool hasdataseg = false;
struct Segment *segment = 0;
for (i = 0; i < elf->ehdr->e_shnum; ++i) {
if ((shdr = GetElfSectionHeaderAddress(elf->ehdr, elf->size, i)) &&
@ -590,6 +591,7 @@ static void LoadSectionsIntoSegments(struct Elf *elf) {
segment->vaddr_min = section->shdr->sh_addr;
if (shdr->sh_type == SHT_PROGBITS)
segment->offset_min = section->shdr->sh_offset;
hasdataseg |= segment->prot == (PROT_READ | PROT_WRITE);
}
segment->hasnobits |= shdr->sh_type == SHT_NOBITS;
segment->hasprogbits |= shdr->sh_type == SHT_PROGBITS;
@ -604,6 +606,16 @@ static void LoadSectionsIntoSegments(struct Elf *elf) {
if (segment) {
dll_make_last(&elf->segments, &segment->elem);
}
if (elf->imports && !hasdataseg) {
// if the program we're linking is really tiny and it doesn't have
// either a .data or .bss section but it does import function from
// libraries, then create a synthetic .data segment for the pe iat
segment = NewSegment();
segment->align = 8;
segment->hasprogbits = true;
segment->prot = PROT_READ | PROT_WRITE;
dll_make_last(&elf->segments, &segment->elem);
}
}
static bool ParseDllImportSymbol(const char *symbol_name,
@ -678,8 +690,8 @@ static struct Elf *OpenElf(const char *path) {
if (!elf->strtab) Die(path, "elf doesn't have string table");
elf->secstrs = GetElfSectionNameStringTable(elf->ehdr, elf->size);
if (!elf->strtab) Die(path, "elf doesn't have section string table");
LoadSectionsIntoSegments(elf);
LoadDllImports(elf);
LoadSectionsIntoSegments(elf);
close(fd);
return elf;
}
@ -745,11 +757,20 @@ static void PickPeSectionName(char *p, struct Elf *elf,
static uint32_t GetPeSectionCharacteristics(struct Segment *s) {
uint32_t x = 0;
if (s->prot & PROT_EXEC) x |= kNtPeSectionCntCode | kNtPeSectionMemExecute;
if (s->prot & PROT_READ) x |= kNtPeSectionMemRead;
if (s->prot & PROT_WRITE) x |= kNtPeSectionMemWrite;
if (s->hasnobits) x |= kNtPeSectionCntUninitializedData;
if (s->hasprogbits) x |= kNtPeSectionCntInitializedData;
if (s->prot & PROT_EXEC) {
x |= kNtPeSectionCntCode | kNtPeSectionMemExecute;
} else if (s->hasprogbits) {
x |= kNtPeSectionCntInitializedData;
}
if (s->prot & PROT_READ) {
x |= kNtPeSectionMemRead;
}
if (s->prot & PROT_WRITE) {
x |= kNtPeSectionMemWrite;
}
if (s->hasnobits) {
x |= kNtPeSectionCntUninitializedData;
}
return x;
}
@ -780,9 +801,6 @@ static struct ImagePointer GeneratePe(struct Elf *elf, char *fp, int64_t vp) {
mzhdr = (struct NtImageDosHeader *)fp;
fp += sizeof(struct NtImageDosHeader);
memcpy(mzhdr, "MZ", 2);
/* memcpy(mzhdr, "MZqFpD='\n\n", 10); */
/* mzhdr->e_oemid = 'J' | 'T' << 8; */
/* memcpy(mzhdr->e_res2, "' <<'@'\n", 8); */
// embed the ms-dos stub and/or bios bootloader
if (stubpath) {
@ -797,10 +815,6 @@ static struct ImagePointer GeneratePe(struct Elf *elf, char *fp, int64_t vp) {
if (close(fd)) DieSys(stubpath);
}
// begin the shell script
/* fp = stpcpy(fp, "\n@\n" */
/* "#'\"\n"); */
// output portable executable magic
fp = ALIGN_FILE(fp, 8);
mzhdr->e_lfanew = fp - (char *)mzhdr;
@ -956,6 +970,7 @@ static struct ImagePointer GeneratePe(struct Elf *elf, char *fp, int64_t vp) {
struct Library *library = LIBRARY_CONTAINER(e);
library->idt->ImportAddressTable = vp - opthdr->ImageBase;
fp = mempcpy(fp, library->ilt, library->iltbytes);
segment->hasprogbits = true;
for (struct Dll *g = dll_first(library->funcs); g;
g = dll_next(library->funcs, g)) {
struct Func *func = FUNC_CONTAINER(g);
@ -1055,19 +1070,17 @@ int main(int argc, char *argv[]) {
#ifndef NDEBUG
ShowCrashReports();
#endif
// get program name
prog = argv[0];
if (!prog) prog = "elf2pe";
// process flags
GetOpts(argc, argv);
// translate executable
struct Elf *elf = OpenElf(argv[optind]);
char *buf = memalign(MAX_ALIGN, INT_MAX);
char *buf = memalign(MAX_ALIGN, 134217728);
struct ImagePointer ip = GeneratePe(elf, buf, 0x00400000);
if (creat(outpath, 0755) == -1) DieSys(elf->path);
Pwrite(3, buf, ip.fp - buf, 0);
if (close(3)) DieSys(elf->path);
// PrintElf(elf);
}

8
tool/build/elf2pe.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef COSMOPOLITAN_TOOL_BUILD_ELF2PE_H_
#define COSMOPOLITAN_TOOL_BUILD_ELF2PE_H_
#define __dll_import(DLL, RET, FUNC, ARGS) \
extern RET(*const __attribute__((__ms_abi__, __weak__)) FUNC) \
ARGS __asm__("\"dll$" DLL "$" #FUNC "\"")
#endif /* COSMOPOLITAN_TOOL_BUILD_ELF2PE_H_ */

10
tool/build/lib/lib.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef COSMOPOLITAN_TOOL_BUILD_LIB_LIB_H_
#define COSMOPOLITAN_TOOL_BUILD_LIB_LIB_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
bool ParseSupportVector(char *, int *);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_TOOL_BUILD_LIB_LIB_H_ */

View file

@ -0,0 +1,75 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/fmt/conv.h"
#include "libc/str/str.h"
#include "tool/build/lib/lib.h"
bool ParseSupportVector(char *str, int *out_bits) {
// you can supply a number, e.g. 123, 0x123, etc.
char *endptr;
int bits = strtol(str, &endptr, 0);
if (!*endptr) {
*out_bits = bits;
return true;
}
// you can supply a string, e.g. -s linux+mac+bsd
bits = 0;
char *tok, *state;
const char *sep = " ,+:/|";
while ((tok = strtok_r(str, sep, &state))) {
if (_startswithi(tok, "_HOST")) {
tok += 5;
}
if (!strcasecmp(tok, "linux")) {
bits |= _HOSTLINUX;
} else if (!strcasecmp(tok, "metal")) {
bits |= _HOSTMETAL;
} else if (!strcasecmp(tok, "windows") || //
!strcasecmp(tok, "win") || //
!strcasecmp(tok, "nt") || //
!strcasecmp(tok, "pe")) {
bits |= _HOSTWINDOWS;
} else if (!strcasecmp(tok, "xnu") || //
!strcasecmp(tok, "mac") || //
!strcasecmp(tok, "macos") || //
!strcasecmp(tok, "macho") || //
!strcasecmp(tok, "darwin")) {
bits |= _HOSTXNU;
} else if (!strcasecmp(tok, "freebsd")) {
bits |= _HOSTFREEBSD;
} else if (!strcasecmp(tok, "openbsd")) {
bits |= _HOSTOPENBSD;
} else if (!strcasecmp(tok, "netbsd")) {
bits |= _HOSTNETBSD;
} else if (!strcasecmp(tok, "bsd")) {
bits |= _HOSTFREEBSD | _HOSTOPENBSD | _HOSTNETBSD;
} else if (!strcasecmp(tok, "elf")) {
bits |=
_HOSTMETAL | _HOSTLINUX | _HOSTFREEBSD | _HOSTOPENBSD | _HOSTNETBSD;
} else if (!strcasecmp(tok, "unix")) {
bits |= _HOSTLINUX | _HOSTFREEBSD | _HOSTOPENBSD | _HOSTNETBSD | _HOSTXNU;
} else {
return false;
}
str = 0;
}
*out_bits = bits;
return true;
}

View file

@ -18,6 +18,7 @@
*/
#include "libc/calls/calls.h"
#include "libc/intrin/bits.h"
#include "libc/intrin/kprintf.h"
#include "libc/limits.h"
#include "libc/nt/struct/imageimportbyname.internal.h"
#include "libc/nt/struct/imageimportdescriptor.internal.h"
@ -27,10 +28,29 @@
#include "libc/runtime/runtime.h"
#include "libc/stdckdint.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
/**
* @fileoverview Linter for PE static executable files.
*
* Generating PE files from scratch is tricky. There's numerous things
* that can go wrong, and operating systems don't explain what's wrong
* when they refuse to run a program. This program can help illuminate
* any issues with your generated binaries, with better error messages
*/
struct Exe {
char *map;
size_t size;
const char *path;
struct NtImageNtHeaders *pe;
struct NtImageSectionHeader *sections;
uint32_t section_count;
};
static wontreturn void Die(const char *thing, const char *reason) {
tinyprint(2, thing, ": ", reason, "\n", NULL);
exit(1);
@ -41,6 +61,68 @@ static wontreturn void DieSys(const char *thing) {
exit(1);
}
static void LogPeSections(FILE *f, struct NtImageSectionHeader *p, size_t n) {
size_t i;
fprintf(f, "Name Offset RelativeVirtAddr FileSiz MemSiz Flg\n");
for (i = 0; i < n; ++i) {
fprintf(f, "%-8.8s 0x%06lx 0x%016lx 0x%06lx 0x%06lx %c%c%c\n", p[i].Name,
p[i].PointerToRawData, p[i].VirtualAddress, p[i].SizeOfRawData,
p[i].Misc.VirtualSize,
p[i].Characteristics & kNtPeSectionMemRead ? 'R' : ' ',
p[i].Characteristics & kNtPeSectionMemWrite ? 'W' : ' ',
p[i].Characteristics & kNtPeSectionMemExecute ? 'E' : ' ');
}
}
// resolves relative virtual address
//
// this is a trivial process when an executable has been loaded properly
// i.e. a separate mmap() call was made for each individual section; but
// we've only mapped the executable file itself into memory; thus, we'll
// need to remap a virtual address into a file offset to get the pointer
//
// returns pointer to image data, or null on error
static void *GetRva(struct Exe *exe, uint32_t rva, uint32_t size) {
int i;
for (i = 0; i < exe->section_count; ++i) {
if (exe->sections[i].VirtualAddress <= rva &&
rva < exe->sections[i].VirtualAddress +
exe->sections[i].Misc.VirtualSize) {
if (rva + size <=
exe->sections[i].VirtualAddress + exe->sections[i].Misc.VirtualSize) {
return exe->map + exe->sections[i].PointerToRawData +
(rva - exe->sections[i].VirtualAddress);
} else {
break;
}
}
}
return 0;
}
static bool HasControlCodes(const char *s) {
int c;
while ((c = *s++)) {
if (isascii(c) && iscntrl(c)) {
return true;
}
}
return false;
}
static void CheckPeImportByName(struct Exe *exe, uint32_t rva) {
struct NtImageImportByName *hintname;
if (rva & 1)
Die(exe->path, "PE IMAGE_IMPORT_BY_NAME (hint name) structures must "
"be 2-byte aligned");
if (!(hintname = GetRva(exe, rva, sizeof(struct NtImageImportByName))))
Die(exe->path, "PE import table RVA entry didn't reslove");
if (!*hintname->Name)
Die(exe->path, "PE imported function name is empty string");
if (HasControlCodes(hintname->Name))
Die(exe->path, "PE imported function name contains ascii control codes");
}
static void CheckPe(const char *path, char *map, size_t size) {
int pagesz = 4096;
@ -53,6 +135,8 @@ static void CheckPe(const char *path, char *map, size_t size) {
uint32_t pe_offset;
if ((pe_offset = READ32LE(map + 60)) >= size)
Die(path, "PE header offset points past end of image");
if (pe_offset & 7)
Die(path, "PE header offset must possess an 8-byte alignment");
if (pe_offset + sizeof(struct NtImageNtHeaders) > size)
Die(path, "PE mandatory headers overlap end of image");
struct NtImageNtHeaders *pe = (struct NtImageNtHeaders *)(map + pe_offset);
@ -93,12 +177,14 @@ static void CheckPe(const char *path, char *map, size_t size) {
Die(path, "PE FileHeader.Characteristics needs "
"IMAGE_FILE_LARGE_ADDRESS_AWARE if ImageBase > INT_MAX");
// fixup pe header
// validate the size of the pe optional headers
int len;
if (ckd_mul(&len, pe->OptionalHeader.NumberOfRvaAndSizes, 8) ||
ckd_add(&len, len, sizeof(struct NtImageOptionalHeader)) ||
pe->FileHeader.SizeOfOptionalHeader < len)
Die(path, "PE SizeOfOptionalHeader too small");
if (ckd_mul(&len, pe->OptionalHeader.NumberOfRvaAndSizes,
sizeof(struct NtImageDataDirectory)) ||
ckd_add(&len, len, sizeof(struct NtImageOptionalHeader)))
Die(path, "encountered overflow computing PE SizeOfOptionalHeader");
if (pe->FileHeader.SizeOfOptionalHeader != len)
Die(path, "PE SizeOfOptionalHeader had incorrect value");
if (len > size || (char *)&pe->OptionalHeader + len > map + size)
Die(path, "PE OptionalHeader overflows image");
@ -167,34 +253,60 @@ static void CheckPe(const char *path, char *map, size_t size) {
}
}
#if 0 // broken
// create an object for our portable executable
struct Exe exe[1] = {{
.pe = pe,
.path = path,
.map = map,
.size = size,
.sections = sections,
.section_count = pe->FileHeader.NumberOfSections,
}};
// validate dll imports
if (pe->OptionalHeader.NumberOfRvaAndSizes >= 2 &&
pe->OptionalHeader.DataDirectory[kNtImageDirectoryEntryImport]
.VirtualAddress) {
struct NtImageImportDescriptor *idt =
(struct NtImageImportDescriptor
*)(map +
pe->OptionalHeader.DataDirectory[kNtImageDirectoryEntryImport]
.VirtualAddress);
for (int i = 0;; ++i) {
if ((char *)(idt + i + sizeof(*idt)) > map + size)
Die(path, "PE IMAGE_DIRECTORY_ENTRY_IMPORT points outside image");
if (!idt[i].ImportLookupTable) break;
uint64_t *ilt = (uint64_t *)(map + idt[i].ImportLookupTable);
for (int j = 0;; ++j) {
if ((char *)(ilt + j + sizeof(*ilt)) > map + size)
Die(path, "PE ImportLookupTable points outside image");
if (!ilt[j]) break;
struct NtImageImportByName *func =
(struct NtImageImportByName *)(map + ilt[j]);
struct NtImageDataDirectory *ddImports =
exe->pe->OptionalHeader.DataDirectory + kNtImageDirectoryEntryImport;
if (exe->pe->OptionalHeader.NumberOfRvaAndSizes >= 2 && ddImports->Size) {
if (ddImports->Size % sizeof(struct NtImageImportDescriptor) != 0)
Die(exe->path, "PE Imports data directory entry Size should be a "
"multiple of sizeof(IMAGE_IMPORT_DESCRIPTOR)");
if (ddImports->VirtualAddress & 3)
Die(exe->path, "PE IMAGE_IMPORT_DESCRIPTOR table must be 4-byte aligned");
struct NtImageImportDescriptor *idt;
if (!(idt = GetRva(exe, ddImports->VirtualAddress, ddImports->Size)))
Die(exe->path, "couldn't resolve VirtualAddress/Size RVA of PE Import "
"Directory Table to within a defined PE section");
if (idt->ImportLookupTable >= exe->size)
Die(exe->path, "Import Directory Table VirtualAddress/Size RVA resolved "
"to dense unrelated binary content");
for (int i = 0; idt->ImportLookupTable; ++i, ++idt) {
char *dllname;
if (!(dllname = GetRva(exe, idt->DllNameRva, 2)))
Die(exe->path, "PE DllNameRva doesn't resolve to a PE section");
if (!*dllname)
Die(exe->path, "PE import DllNameRva pointed to empty string");
if (HasControlCodes(dllname))
Die(exe->path, "PE import DllNameRva contained ascii control codes");
if (idt->ImportLookupTable & 7)
Die(exe->path, "PE ImportLookupTable must be 8-byte aligned");
if (idt->ImportAddressTable & 7)
Die(exe->path, "PE ImportAddressTable must be 8-byte aligned");
uint64_t *ilt, *iat;
if (!(ilt = GetRva(exe, idt->ImportLookupTable, 8)))
Die(exe->path, "PE ImportLookupTable RVA didn't resolve to a section");
if (!(iat = GetRva(exe, idt->ImportAddressTable, 8)))
Die(exe->path, "PE ImportAddressTable RVA didn't resolve to a section");
for (int j = 0;; ++j, ++ilt, ++iat) {
if (*ilt != *iat) {
kprintf("i=%d j=%d ilt=%#x iat=%#x\n", i, j, *ilt, *iat);
Die(exe->path, "PE ImportLookupTable and ImportAddressTable should "
"have identical content");
}
if (!*ilt) break;
CheckPeImportByName(exe, *ilt);
}
uint64_t *iat = (uint64_t *)(map + idt[i].ImportAddressTable);
if ((char *)(iat + sizeof(*iat)) > map + size)
Die(path, "PE ImportAddressTable points outside image");
}
}
#endif
}
int main(int argc, char *argv[]) {
@ -202,6 +314,9 @@ int main(int argc, char *argv[]) {
void *map;
ssize_t size;
const char *path;
#ifndef NDEBUG
ShowCrashReports();
#endif
for (i = 1; i < argc; ++i) {
path = argv[i];
if ((fd = open(path, O_RDONLY)) == -1) DieSys(path);

View file

@ -483,8 +483,8 @@ void HandleClient(void) {
goto TerminateJob;
}
if (received > 0) {
WARNF("%s client sent %d unexpected bytes so killing job", exename,
received);
WARNF("%s client sent %d bytes unexpected bytes so killing job",
exename, received);
goto TerminateJob;
}
if (received != MBEDTLS_ERR_SSL_WANT_READ) {