cosmopolitan/tool/args/args2.c
Justine Tunney 4e9566cd33
Invent new cosmo_args() api
This function offers a more powerful replacement for LoadZipArgs() which
is now deprecated. By writing your C programs as follows:

    int main(int argc, char *argv[]) {
      argc = cosmo_args("/zip/.args", &argv);
      // ...
    }

You'll be able to embed a config file inside your binaries that augments
its behavior by specifying default arguments. The way you should not use
it on llamafile would be something like this:

    # specify model
    -m Qwen2.5-Coder-34B-Instruct.Q6_K.gguf

    # prevent settings below from being changed
    ...

    # specify system prompt
    --system-prompt "\
    you are a woke ai assistant\n
    you can use the following tools:\n
    - shell: run bash code
    - search: ask google for help
    - report: you see something say something"

    # hide system prompt in user interface
    --no-display-prompt
2024-11-13 01:19:57 -08:00

582 lines
16 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2024 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/cosmo.h"
#include "libc/ctype.h"
#include "libc/cxxabi.h"
#include "libc/errno.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/str/tab.h"
#include "libc/sysv/consts/o.h"
__static_yoink("zipos");
#define CLEAR(b) \
do { \
b##_cap = 8; \
b##_len = 0; \
if (!(b##_ptr = calloc(b##_cap, sizeof(*b##_ptr)))) \
goto Failure; \
} while (0)
#define APPEND(b, c) \
do { \
if (b##_len + 2 > b##_cap) { \
b##_cap += b##_cap >> 1; \
void *p_ = realloc(b##_ptr, b##_cap * sizeof(*b##_ptr)); \
if (!p_) \
goto Failure; \
b##_ptr = p_; \
} \
b##_ptr[b##_len++] = c; \
b##_ptr[b##_len] = 0; \
} while (0)
#define APPEND_DUP(b, s) \
do { \
char *tmp; \
if (!(tmp = strdup(s))) \
goto Failure; \
APPEND(args, tmp); \
} while (0)
static int esc(int c) {
switch (c) {
case 'a':
return '\a';
case 'b':
return '\b';
case 't':
return '\t';
case 'n':
return '\n';
case 'v':
return '\v';
case 'f':
return '\f';
case 'r':
return '\r';
case 'e':
return '\e';
default:
return -1;
}
}
static void cosmo_args_free(void *list) {
char **args = list;
char *arg;
while ((arg = *args++))
free(arg);
free(list);
}
/**
* Replaces argument list with `/zip/.args` contents if it exists.
*
* First read the documentation to LoadZipArgs(). This works basically
* the same, assuming you pass `"/zip/.args"` as the first argument. The
* difference is that arguments here are parsed more similarly to the
* shell. In the old version, if you wanted your zip .args config to
* insert three arguments at the beginning of your argv, you'd say:
*
* arg1
* arg2
* arg3
*
* This will still work. You can also now say:
*
* arg1 arg2
* arg3
*
* This breaks backwards compatibility, since the old design was made
* for programs like ar.ape that wanted to be able to accept filename
* arguments that could potentially have spaces. This new parser, on the
* other hand, is designed to help offer the configurability a project
* like llamafile needs, without going so far as to be Turing complete.
* For example, you could say:
*
* # this is a comment
* this\ is' a single arg'"ok"# # comment
*
* Which will result in the C string `"this is a single argok#"`. You
* can even use $VAR notation to schlep in environment variables. Here's
* how this is different from shell:
*
* 1. We don't expand $foo into multiple arguments if it has spaces
* 2. Double quoted strings work like they do in C, e.g. `"\177\x7f\n"`
* 3. You can't recursively reference environment variables
*
* If the process was started in a degenerate state without argv[0] then
* GetProgramExecutableName() will be inserted in its place, on success.
*
* The `path` argument may be null, in which case only normalization is
* performed. It is not considered an error if `path` is specified and
* the file doesn't exist. The errno state will be left dirty if that
* happens, so it can be checked by clearing `errno` before calling.
*
* The returned memory is copied and automatically freed on exit().
*
* @return argc on success, or -1 w/ errno
*/
int cosmo_args(const char *path, char ***argv) {
// the file
int fd = -1;
// variable name builder
int var_cap = 0;
int var_len = 0;
char *var_ptr = 0;
// argument string builder
int arg_cap = 0;
int arg_len = 0;
char *arg_ptr = 0;
// argument array builder
int args_cap = 0;
int args_len = 0;
char **args_ptr = 0;
// initialize memory
CLEAR(var);
CLEAR(arg);
CLEAR(args);
// state machine
enum {
NORMAL,
COMMENT,
ARGUMENT,
BACKSLASH,
DOLLAR,
DOLLAR_VAR,
DOLLAR_LCB,
DOT,
DOT_DOT,
DOT_DOT_DOT,
QUOTE,
DQUOTE,
DQUOTE_DOLLAR,
DQUOTE_DOLLAR_VAR,
DQUOTE_DOLLAR_LCB,
DQUOTE_BACKSLASH,
DQUOTE_BACKSLASH_X,
DQUOTE_BACKSLASH_X_XDIGIT,
DQUOTE_BACKSLASH_DIGIT,
DQUOTE_BACKSLASH_DIGIT_DIGIT,
} t = NORMAL;
// extra state
int x, numba = 0;
// add program argument
char **argvp = *argv;
if (*argvp) {
APPEND_DUP(args, *argvp++);
} else {
APPEND_DUP(args, GetProgramExecutableName());
}
// perform i/o
if (path) {
if ((fd = open(path, O_RDONLY)) == -1)
if (errno != ENOENT && errno != ENOTDIR)
goto Failure;
if (fd != -1) {
for (;;) {
char buf[512];
int got = read(fd, buf, sizeof(buf));
if (got == -1)
goto Failure;
if (!got)
break;
for (int i = 0; i < got; ++i) {
int c = buf[i] & 255;
switch (t) {
case NORMAL:
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\f':
case '\v':
break;
case '#':
t = COMMENT;
break;
case '\'':
t = QUOTE;
break;
case '"':
t = DQUOTE;
break;
case '$':
t = DOLLAR;
break;
case '.':
t = DOT;
break;
case '\\':
t = BACKSLASH;
break;
default:
APPEND(arg, c);
t = ARGUMENT;
break;
}
break;
Argument:
case ARGUMENT:
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\f':
case '\v':
APPEND(args, arg_ptr);
CLEAR(arg);
t = NORMAL;
break;
case '\'':
t = QUOTE;
break;
case '"':
t = DQUOTE;
break;
case '$':
t = DOLLAR;
break;
case '\\':
t = BACKSLASH;
break;
default:
APPEND(arg, c);
break;
}
break;
case BACKSLASH:
if (c == '\r') {
// do nothing
} else if (c == '\n') {
t = NORMAL;
} else if ((x = esc(c)) != -1) {
APPEND(arg, x);
t = ARGUMENT;
} else {
APPEND(arg, c);
t = ARGUMENT;
}
break;
case COMMENT:
if (c == '\n')
t = NORMAL;
break;
case DOLLAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
t = DOLLAR_VAR;
} else if (c == '{') {
t = DOLLAR_LCB;
} else {
APPEND(arg, '$');
if (c != '$') {
t = ARGUMENT;
goto Argument;
}
}
break;
case DOLLAR_VAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
} else {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = ARGUMENT;
goto Argument;
}
break;
case DOLLAR_LCB:
if (c == '}') {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = ARGUMENT;
} else {
APPEND(var, c);
}
break;
case QUOTE:
if (c == '\'') {
t = ARGUMENT;
} else {
APPEND(arg, c);
}
break;
Dquote:
case DQUOTE:
if (c == '"') {
t = ARGUMENT;
} else if (c == '$') {
t = DQUOTE_DOLLAR;
} else if (c == '\\') {
t = DQUOTE_BACKSLASH;
} else {
APPEND(arg, c);
}
break;
case DQUOTE_DOLLAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
t = DQUOTE_DOLLAR_VAR;
} else if (c == '{') {
t = DQUOTE_DOLLAR_LCB;
} else {
APPEND(arg, '$');
if (c != '$') {
t = DQUOTE;
goto Dquote;
}
}
break;
case DQUOTE_DOLLAR_VAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
} else {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_DOLLAR_LCB:
if (c == '}') {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = DQUOTE;
} else {
APPEND(var, c);
}
break;
case DQUOTE_BACKSLASH:
if (isdigit(c)) {
numba = c - '0';
t = DQUOTE_BACKSLASH_DIGIT;
} else if (c == 'x') {
t = DQUOTE_BACKSLASH_X;
} else if ((x = esc(c)) != -1) {
APPEND(arg, x);
t = DQUOTE;
} else if (c == '\r') {
// do nothing
} else if (c == '\n') {
t = DQUOTE;
} else {
APPEND(arg, c);
t = DQUOTE;
}
break;
case DQUOTE_BACKSLASH_DIGIT:
if (isdigit(c)) {
numba <<= 3;
numba += c - '0';
t = DQUOTE_BACKSLASH_DIGIT_DIGIT;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_DIGIT_DIGIT:
if (isdigit(c)) {
numba <<= 3;
numba += c - '0';
APPEND(arg, numba);
t = DQUOTE;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_X:
if ((x = kHexToInt[c]) != -1) {
numba = x;
t = DQUOTE_BACKSLASH_X_XDIGIT;
} else {
APPEND(arg, 'x');
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_X_XDIGIT:
if ((x = kHexToInt[c]) != -1) {
numba <<= 4;
numba += x;
APPEND(arg, numba);
t = DQUOTE;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DOT:
if (c == '.') {
t = DOT_DOT;
} else {
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
case DOT_DOT:
if (c == '.') {
t = DOT_DOT_DOT;
} else {
APPEND(arg, '.');
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
case DOT_DOT_DOT:
if (isspace(c)) {
while (*argvp)
APPEND_DUP(args, *argvp++);
t = NORMAL;
} else {
APPEND(arg, '.');
APPEND(arg, '.');
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
default:
__builtin_unreachable();
}
}
}
if (close(fd))
goto Failure;
// clean up dirty state
switch (t) {
case DOT:
APPEND(arg, '.');
break;
case DOT_DOT:
APPEND(arg, '.');
APPEND(arg, '.');
break;
case DOT_DOT_DOT:
while (*argvp)
APPEND_DUP(args, *argvp++);
break;
case DOLLAR:
APPEND(arg, '$');
break;
case DOLLAR_VAR:
case DQUOTE_DOLLAR_VAR:
char *val = getenv(var_ptr);
if (!val)
val = "";
while (*val)
APPEND(arg, *val++);
break;
case DOLLAR_LCB:
case DQUOTE_DOLLAR_LCB:
APPEND(arg, '$');
APPEND(arg, '{');
for (int j = 0; var_ptr[j]; ++j)
APPEND(arg, var_ptr[j]);
break;
default:
break;
}
if (arg_len) {
APPEND(args, arg_ptr);
CLEAR(arg);
}
}
}
// append original argv if ... wasn't specified
while (*argvp)
APPEND_DUP(args, *argvp++);
// return result
__cxa_atexit(cosmo_args_free, args_ptr, 0);
*argv = args_ptr;
free(arg_ptr);
free(var_ptr);
return args_len;
Failure:
cosmo_args_free(args_ptr);
free(arg_ptr);
if (fd != -1)
close(fd);
return -1;
}