cosmopolitan/tool/build/ar.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

552 lines
18 KiB
C
Raw Normal View History

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2023 Justine Alexandra Roberts Tunney
2020-12-28 01:18:44 +00:00
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
2020-12-28 01:18:44 +00:00
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/ar.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/iovec.h"
#include "libc/calls/struct/stat.h"
#include "libc/elf/def.h"
#include "libc/elf/elf.h"
#include "libc/elf/scalar.h"
#include "libc/elf/struct/sym.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
2023-06-18 12:39:31 +00:00
#include "libc/fmt/libgen.h"
#include "libc/fmt/magnumstrs.internal.h"
#include "libc/intrin/bsr.h"
#include "libc/limits.h"
#include "libc/macros.h"
#include "libc/runtime/runtime.h"
#include "libc/serialize.h"
#include "libc/stdckdint.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/s.h"
#include "tool/build/lib/ar.h"
#include "tool/build/lib/getargs.h"
/**
* @fileoverview cosmopolitan ar
*/
static wontreturn void ShowUsage(int rc, int fd) {
tinyprint( //
fd,
"USAGE\n"
"\n",
" ", program_invocation_name, " FLAGS ARCHIVE FILE...\n",
"\n"
"FLAGS\n"
"\n"
" rcs create new archive with index\n"
" rcsD always deterministic\n"
" --help show usage\n"
" --version show program details\n"
"\n"
"ARGUMENTS\n"
"\n"
" ARCHIVE should be foo.a\n"
" FILE should be foo.o, lib.a, or @args.txt\n"
"\n"
"DOCUMENTATION\n"
"\n"
" Your Cosmopolitan Archiver is superior:\n"
"\n"
" - Isn't accidentally quadratic like GNU ar. Cosmopolitan Libc is\n"
" distributed as libcosmo.a which contains 5000+ object files and\n"
" is tens of megabytes in size. GNU ar isn't capable of making an\n"
" archive that large. So we invented this ar as a replacement.\n"
"\n"
" - Goes 2x faster than LLVM ar thanks to modern system calls like\n"
" copy_file_range(). This ar should also use 100x less memory.\n"
"\n"
" - Can be built as a 96kb APE binary that works well on six OSes.\n"
" Cosmopolitan uses the same dev tools on all OSes and archsr to\n"
" ensure compilations are simple and deterministic for everyone.\n"
"\n"
" This static archiver introduces handy features:\n"
"\n"
" - Arguments may be supplied in an `@args.txt` file. This is useful\n"
" for overcoming the `ARG_MAX` limit, which is especially important\n"
" on Windows, where only very few command arguments can be passed.\n"
" GNU Make can be easily configured to generate args files.\n"
"\n"
" - You can merge many .a files into one big .a file. Args that end\n"
" with .a will be opened as static archives. The .o files inside it\n"
" will then be added to your new archive. It would be the same as if\n"
" you passed all the .o files as args. This is fast. For example, to\n"
" merge 37 .a files containing 5000 .o files takes ~38 milliseconds.\n"
"\n"
" - Directory arguments are ignored. The biggest gotcha with makefiles\n"
" that use wildcard globbing is that it can't detect when files are\n"
" deleted, which means it can't invalidate the artifacts which had\n"
" depended on that file, leading to nondeterminism and surprising\n"
" build failures. The simplest way to solve that is to add the\n"
" directory to the prerequisites list, since the directory modified\n"
" time will be updated by the OS when files inside it are deleted.\n"
" When doing this, it's simple and elegant to not need to filter\n"
" the directory prerequisites before passing `$^` to `ar`.\n"
"\n",
NULL);
exit(rc);
}
2021-02-07 14:11:44 +00:00
#define HEAP_SIZE (256L * 1024 * 1024)
struct Ints {
int *p;
size_t i;
};
struct Args {
char **p;
size_t i;
};
struct Bytes {
char *p;
size_t i;
};
static void SortChars(char *A, long n) {
long i, j, t;
for (i = 1; i < n; i++) {
t = A[i];
j = i - 1;
while (j >= 0 && A[j] > t) {
A[j + 1] = A[j];
j = j - 1;
}
A[j + 1] = t;
}
}
static wontreturn void Die(const char *path, const char *reason) {
tinyprint(2, path, ": ", reason, "\n", NULL);
exit(1);
}
static wontreturn void SysDie(const char *path, const char *func) {
const char *errstr;
if (!(errstr = _strerdoc(errno)))
errstr = "Unknown error";
tinyprint(2, path, ": ", func, ": ", errstr, "\n", NULL);
exit(1);
}
// allocates 𝑛 bytes of memory aligned on 𝑎 from .bss
// - avoids binary bloat of mmap() and malloc()
// - dies if out of memory or overflow occurs
// - new memory is always zero-initialized
// - can't be resized; use reballoc api
// - can't be freed or reclaimed
static void *balloc(size_t n, size_t a) {
size_t c;
int resizable;
uintptr_t h, p;
static size_t used;
static char heap[HEAP_SIZE];
assert(a >= 1 && !(a & (a - 1)));
h = (uintptr_t)heap;
p = h + used;
if ((resizable = (ssize_t)n < 0)) {
n = ~n;
p += sizeof(c);
}
p += a - 1;
p &= -a;
if (n <= a) {
c = a;
} else if (!resizable) {
c = n;
} else {
c = 2ull << (__builtin_clzll(n - 1) ^ (sizeof(long long) * CHAR_BIT - 1));
}
if (c < a || c > HEAP_SIZE || p + c > h + HEAP_SIZE)
Die(program_invocation_name, "out of memory");
used = p - h + c;
if (resizable)
memcpy((char *)p - sizeof(c), &c, sizeof(c));
return (void *)p;
}
// reallocates 𝑛 𝑧-sized elements aligned on 𝑧 from .bss
// - avoids binary bloat of mmap() and realloc()
// - dies if out of memory or overflow occurs
// - new memory is always zero-initialized
// - abstracts multiply overflow check
// - shrinking always happens in-place
// - growing cost is always amortized
// - can't be freed or reclaimed
static void *reballoc(void *p, size_t n, size_t z) {
size_t c;
assert(n >= 0);
assert(z >= 1 && !(z & (z - 1)));
if (ckd_mul(&n, n, z))
n = HEAP_SIZE;
if (!p)
return balloc(~n, z);
memcpy(&c, (char *)p - sizeof(c), sizeof(c));
assert(c >= z && c < HEAP_SIZE && !(c & (c - 1)));
if (n <= c)
return p;
return memcpy(balloc(~n, z), p, c);
}
static char *StrDup(const char *s) {
size_t n = strlen(s) + 1;
return memcpy(balloc(n, 1), s, n);
}
static char *StrCat(const char *a, const char *b) {
char *p;
size_t n, m;
n = strlen(a);
m = strlen(b);
p = balloc(n + m + 1, 1);
memcpy(p, a, n);
memcpy(p + n, b, m + 1);
return p;
}
static void AppendInt(struct Ints *l, int i) {
l->p = reballoc(l->p, l->i + 2, sizeof(*l->p));
l->p[l->i++] = i;
}
static void AppendArg(struct Args *l, char *s) {
l->p = reballoc(l->p, l->i + 2, sizeof(*l->p));
l->p[l->i++] = s;
}
static void AppendBytes(struct Bytes *l, const char *s, size_t n) {
l->p = reballoc(l->p, l->i + n + 1, sizeof(*l->p));
memcpy(l->p + l->i, s, n);
l->i += n;
}
static int IsEqual(const char *a, const char *b) {
return !strcmp(a, b);
}
static void MakeArHeader(struct ar_hdr *h, //
const char *name, //
int mode, //
size_t size) { //
size_t n;
char b[21];
memset(h, ' ', sizeof(*h));
n = strlen(name);
if (n > ARRAYLEN(h->ar_name)) {
Die(program_invocation_name, "ar_name overflow");
}
memcpy(h->ar_name, name, n);
if (!IsEqual(name, "//")) {
h->ar_date[0] = '0';
h->ar_uid[0] = '0';
h->ar_gid[0] = '0';
memcpy(h->ar_mode, b, FormatOctal32(b, mode & 0777, false) - b);
}
if (size > 9999999999) {
Die(program_invocation_name, "ar_size overflow");
}
memcpy(h->ar_size, b, FormatUint64(b, size) - b);
memcpy(h->ar_fmag, ARFMAG, sizeof(h->ar_fmag));
}
// copies data between file descriptors until end of file
// - assumes signal handlers aren't in play
// - uses copy_file_range() if possible
// - returns number of bytes exchanged
// - dies if operation fails
static void CopyFileOrDie(const char *inpath, int infd, //
const char *outpath, int outfd, //
size_t offset, size_t size) {
char buf[512];
size_t exchanged;
enum { CFR, RW } mode;
ssize_t want, got, wrote;
if (offset)
if (lseek(infd, offset, SEEK_SET) == -1)
SysDie(inpath, "lseek");
for (mode = CFR; size; size -= exchanged) {
if (mode == CFR) {
want = 4194304;
if (want > size)
want = size;
got = copy_file_range(infd, 0, outfd, 0, want, 0);
if (!got)
Die(inpath, "unexpected eof");
if (got != -1) {
exchanged = got;
} else if (errno == EXDEV || // different partitions
errno == EINVAL || // possible w/ ecryptfs
errno == ENOSYS || // not linux or freebsd
errno == ENOTSUP || // probably a /zip file
errno == EOPNOTSUPP) { // technically the same
exchanged = 0;
mode = RW;
} else {
SysDie(inpath, "copy_file_range");
}
} else {
want = sizeof(buf);
if (want > size)
want = size;
got = read(infd, buf, want);
if (!got)
Die(inpath, "unexpected eof");
if (got == -1)
SysDie(inpath, "read");
wrote = write(outfd, buf, got);
if (wrote == -1)
SysDie(outpath, "write");
if (wrote != got)
Die(outpath, "posix violated");
exchanged = wrote;
}
}
}
static void AppendName(const char *name, struct Args *names,
struct Bytes *filenames) {
struct ar_hdr header1;
char bnbuf[PATH_MAX + 1];
strlcpy(bnbuf, name, sizeof(bnbuf));
char *aname = StrCat(basename(bnbuf), "/");
if (strlen(aname) <= sizeof(header1.ar_name)) {
AppendArg(names, aname);
} else {
char ibuf[21];
FormatUint64(ibuf, filenames->i);
AppendArg(names, StrCat("/", ibuf));
AppendBytes(filenames, aname, strlen(aname));
AppendBytes(filenames, "\n", 1);
}
}
static void AppendSymbols(const char *path, const Elf64_Ehdr *elf,
size_t elfsize, struct Bytes *symbols,
struct Ints *symnames, int objid) {
if (!IsElf64Binary(elf, elfsize))
Die(path, "not an elf64 binary");
char *strs = GetElfStringTable(elf, elfsize, ".strtab");
if (!strs)
Die(path, "elf .strtab not found");
Elf64_Xword symcount;
Elf64_Shdr *symsec = GetElfSymbolTable(elf, elfsize, SHT_SYMTAB, &symcount);
Elf64_Sym *syms = GetElfSectionAddress(elf, elfsize, symsec);
if (!syms)
Die(path, "elf symbol table not found");
for (Elf64_Xword j = symsec->sh_info; j < symcount; ++j) {
if (!syms[j].st_name)
continue;
if (syms[j].st_shndx == SHN_UNDEF)
continue;
if (syms[j].st_shndx == SHN_COMMON)
continue;
const char *symname = GetElfString(elf, elfsize, strs, syms[j].st_name);
if (!symname)
Die(path, "elf symbol name corrupted");
AppendBytes(symbols, symname, strlen(symname) + 1);
AppendInt(symnames, objid);
}
}
int main(int argc, char *argv[]) {
int fd, objectid;
struct ar_hdr header1;
struct ar_hdr header2;
#ifdef MODE_DBG
ShowCrashReports();
#endif
// handle hardcoded flags
if (argc == 2) {
if (IsEqual(argv[1], "-n"))
exit(0);
if (IsEqual(argv[1], "-h") || //
IsEqual(argv[1], "-?") || //
IsEqual(argv[1], "--help")) {
ShowUsage(0, 1);
}
if (IsEqual(argv[1], "--version")) {
tinyprint(1,
"cosmopolitan ar v3.0\n"
"copyright 2024 justine tunney\n"
"https://github.com/jart/cosmopolitan\n",
NULL);
exit(0);
}
}
// get flags and output path
if (argc < 3)
Die(argv[0], "missing argument");
char *flags = argv[1];
const char *outpath = argv[2];
// we only support one mode of operation, which is creating a new
// deterministic archive. computing the full archive goes so fast
// on modern systems that it isn't worth supporting the byzantine
// standard posix ar flags intended to improve cassette tape perf
SortChars(flags, strlen(flags));
if (*flags == 'D')
++flags;
if (!IsEqual(flags, "cr") && //
!IsEqual(flags, "cru") && //
!IsEqual(flags, "crsu") && //
!IsEqual(flags, "crs")) {
tinyprint(2, program_invocation_name, ": flags should be rcsD\n", NULL);
ShowUsage(1, 2);
2020-11-18 16:26:03 +00:00
}
struct Args args = {reballoc(0, 4096, sizeof(char *))};
struct Args names = {reballoc(0, 4096, sizeof(char *))};
struct Ints sizes = {reballoc(0, 4096, sizeof(int))};
struct Ints foffsets = {reballoc(0, 4096, sizeof(int))};
struct Ints symnames = {reballoc(0, 16384, sizeof(int))};
struct Bytes symbols = {reballoc(0, 131072, sizeof(char))};
struct Bytes filenames = {reballoc(0, 16384, sizeof(char))};
// perform analysis pass on input files
struct GetArgs ga;
getargs_init(&ga, argv + 3);
for (objectid = 0;;) {
struct stat st;
const char *arg;
if (!(arg = getargs_next(&ga)))
break;
if (endswith(arg, "/"))
continue;
if (endswith(arg, ".pkg"))
continue;
if (endswith(arg, ".a")) {
struct Ar ar;
struct ArFile arf;
openar(&ar, arg);
while (readar(&ar, &arf)) {
AppendArg(&args, StrDup(arg));
AppendInt(&sizes, arf.size);
AppendInt(&foffsets, arf.offset);
AppendName(arf.name, &names, &filenames);
AppendSymbols(arg, arf.data, arf.size, &symbols, &symnames, objectid++);
}
closear(&ar);
} else {
if (stat(arg, &st))
SysDie(arg, "stat");
if (S_ISDIR(st.st_mode))
continue;
if (!st.st_size)
Die(arg, "file is empty");
if (st.st_size > 0x7ffff000)
Die(arg, "file too large");
if ((fd = open(arg, O_RDONLY)) == -1)
SysDie(arg, "open");
AppendArg(&args, StrDup(arg));
AppendInt(&sizes, st.st_size);
AppendInt(&foffsets, 0);
AppendName(arg, &names, &filenames);
void *elf = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (elf == MAP_FAILED)
SysDie(arg, "mmap");
AppendSymbols(arg, elf, st.st_size, &symbols, &symnames, objectid++);
if (munmap(elf, st.st_size))
SysDie(arg, "munmap");
if (close(fd))
SysDie(arg, "close");
}
}
getargs_destroy(&ga);
// compute length of output archive
size_t outsize = 0;
struct iovec iov[8];
int tablebufsize = 4 + symnames.i * 4;
char *tablebuf = balloc(tablebufsize, 1);
int *offsets = balloc(args.i * sizeof(int), sizeof(int));
iov[0].iov_base = ARMAG;
outsize += (iov[0].iov_len = SARMAG);
iov[1].iov_base = &header1;
outsize += (iov[1].iov_len = sizeof(struct ar_hdr));
iov[2].iov_base = tablebuf;
outsize += (iov[2].iov_len = tablebufsize);
iov[3].iov_base = symbols.p;
outsize += (iov[3].iov_len = symbols.i);
iov[4].iov_base = "";
outsize += (iov[4].iov_len = outsize & 1);
iov[5].iov_base = &header2;
outsize += (iov[5].iov_len = filenames.i ? sizeof(struct ar_hdr) : 0);
iov[6].iov_base = filenames.p;
outsize += (iov[6].iov_len = filenames.i);
iov[7].iov_base = "\n";
outsize += (iov[7].iov_len = filenames.i & 1);
for (size_t i = 0; i < args.i; ++i) {
outsize += outsize & 1;
if (outsize > INT_MAX) {
Die(outpath, "archive too large");
}
offsets[i] = outsize;
outsize += sizeof(struct ar_hdr);
outsize += sizes.p[i];
}
// serialize metadata
MakeArHeader(&header1, "/", 0, tablebufsize + ROUNDUP(symbols.i, 2));
MakeArHeader(&header2, "//", 0, ROUNDUP(filenames.i, 2));
WRITE32BE(tablebuf, symnames.i);
for (size_t i = 0; i < symnames.i; ++i)
WRITE32BE(tablebuf + 4 + i * 4, offsets[symnames.p[i]]);
// write output archive
int outfd;
if ((outfd = creat(outpath, 0644)) == -1)
SysDie(outpath, "creat");
if (ftruncate(outfd, outsize))
SysDie(outpath, "ftruncate");
if ((outsize = writev(outfd, iov, ARRAYLEN(iov))) == -1)
SysDie(outpath, "writev[1]");
for (size_t i = 0; i < args.i; ++i) {
const char *inpath = args.p[i];
if (!(i && IsEqual(inpath, args.p[i - 1])))
if ((fd = open(inpath, O_RDONLY)) == -1)
SysDie(inpath, "open");
iov[0].iov_base = "\n";
outsize += (iov[0].iov_len = outsize & 1);
iov[1].iov_base = &header1;
outsize += (iov[1].iov_len = sizeof(struct ar_hdr));
MakeArHeader(&header1, names.p[i], 0100644, sizes.p[i]);
if (writev(outfd, iov, 2) == -1)
SysDie(outpath, "writev[2]");
outsize += sizes.p[i];
CopyFileOrDie(inpath, fd, outpath, outfd, foffsets.p[i], sizes.p[i]);
if (!(i + 1 < args.i && IsEqual(inpath, args.p[i + 1])))
if (close(fd))
SysDie(inpath, "close");
}
if (close(outfd))
SysDie(outpath, "close");
return 0;
}