cosmopolitan/tool/build/lib/getargs.c
2024-08-04 12:52:25 -07:00

187 lines
6.3 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2021 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "tool/build/lib/getargs.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/fmt/magnumstrs.internal.h"
#include "libc/macros.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/sysconf.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
/**
* @fileoverview Fast Command Line Argument Ingestion.
*
* The purpose of this library is to be able to have build commands with
* huge argument lists. The way we do that is by replacing commands like
*
* foo lots of args
*
* with this
*
* echo of args >args
* foo lots @args
*
* This iterator abstracts the process of reading the special `@`
* prefixed args. In order to do that quickly and easily, we make the
* following assumptions:
*
* 1. Arguments don't have whitespace.
* 2. Files have a trailing whitespace.
*
* We need (1) so GNU Make can go faster. Assume we tokenized based on
* newlines. We would would write that in our Makefile as follows:
*
* # don't do this
* target: thousands of args
* $(file >$@.args) $(foreach x,$^,$(file >>$@.args,$(x)))
* tool -o $@ @$@.args
*
* That is slow because it needs to open and close the args file
* thousands of times. If we trade away filenames with spaces then the
* following will only require a couple system calls:
*
* # do this
* target: thousands of args
* $(file >$@.args,$^)
* tool -o $@ @$@.args
*
* We need (2) because it make the code in this file simpler and avoids
* a malloc() dependency. Having that trailing character means argument
* parsing from files can be a zero-copy operation.
*/
#define IsSpace(c) ((255 & (c)) <= ' ')
static wontreturn void getargs_fail(const char *path, const char *reason) {
const char *errstr;
if (!(errstr = _strerdoc(errno)))
errstr = "Unknown error";
tinyprint(2, path, ": ", reason, ": ", errstr, "\n", NULL);
exit(1);
}
/**
* Zeroes GetArgs object and sets its fields.
* @param args is borrowed for the lifetime of the GetArgs object
*/
void getargs_init(struct GetArgs *ga, char **args) {
assert(args);
bzero(ga, sizeof(*ga));
ga->args = args;
}
/**
* Releases memory associated with GetArgs object and zeroes it.
*/
void getargs_destroy(struct GetArgs *ga) {
if (ga->map) {
if (munmap(ga->map, ga->mapsize))
notpossible;
}
bzero(ga, sizeof(*ga));
}
/**
* Gets next argument, e.g.
*
* const char *s;
* while ((s = getargs_next(&ga))) {
* printf("%s\n", s);
* }
*
* @return NUL-terminated string; it should not be freed; it should be
* assumed that it stays in scope until the next getargs_next call
*/
const char *getargs_next(struct GetArgs *ga) {
int fd;
char *p;
size_t k;
ssize_t size;
for (;;) {
if (ga->map) {
for (; ga->j < ga->mapsize; ++ga->j) {
if (!IsSpace(ga->map[ga->j])) {
break;
}
}
k = 0;
#if defined(__SSE2__) && defined(__GNUC__) && !defined(__STRICT_ANSI__)
unsigned m;
typedef unsigned char xmm_t
__attribute__((__vector_size__(16), __aligned__(1)));
for (; ga->j + k + 16 <= ga->mapsize; k += 16) {
if ((m = __builtin_ia32_pmovmskb128(
*(const xmm_t *)(ga->map + ga->j + k) >
(xmm_t){' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' '}) ^
0xffff)) {
k += __builtin_ctzl(m);
break;
}
}
#endif
for (; ga->j + k < ga->mapsize; ++k) {
if (IsSpace(ga->map[ga->j + k])) {
break;
}
}
if (k && ga->j + k < ga->mapsize) {
ga->map[ga->j + k] = 0;
p = ga->map + ga->j;
ga->j += ++k;
return p;
}
if (munmap(ga->map, ga->mapsize))
notpossible;
ga->map = 0;
ga->mapsize = 0;
ga->j = 0;
}
if (!(p = ga->args[ga->i])) {
return 0;
}
++ga->i;
if (*p != '@') {
return p;
}
++p;
if ((fd = open((ga->path = p), O_RDONLY)) == -1) {
getargs_fail(ga->path, "open");
}
if ((size = lseek(fd, 0, SEEK_END)) == -1) {
getargs_fail(ga->path, "lseek");
}
if (size) {
p = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
if (p == MAP_FAILED) {
getargs_fail(ga->path, "mmap");
}
ga->map = p;
ga->mapsize = size;
}
close(fd);
}
}