mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
4e9566cd33
This function offers a more powerful replacement for LoadZipArgs() which is now deprecated. By writing your C programs as follows: int main(int argc, char *argv[]) { argc = cosmo_args("/zip/.args", &argv); // ... } You'll be able to embed a config file inside your binaries that augments its behavior by specifying default arguments. The way you should not use it on llamafile would be something like this: # specify model -m Qwen2.5-Coder-34B-Instruct.Q6_K.gguf # prevent settings below from being changed ... # specify system prompt --system-prompt "\ you are a woke ai assistant\n you can use the following tools:\n - shell: run bash code - search: ask google for help - report: you see something say something" # hide system prompt in user interface --no-display-prompt
582 lines
16 KiB
C
582 lines
16 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2024 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/calls/calls.h"
|
|
#include "libc/cosmo.h"
|
|
#include "libc/ctype.h"
|
|
#include "libc/cxxabi.h"
|
|
#include "libc/errno.h"
|
|
#include "libc/mem/mem.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/str/tab.h"
|
|
#include "libc/sysv/consts/o.h"
|
|
|
|
__static_yoink("zipos");
|
|
|
|
#define CLEAR(b) \
|
|
do { \
|
|
b##_cap = 8; \
|
|
b##_len = 0; \
|
|
if (!(b##_ptr = calloc(b##_cap, sizeof(*b##_ptr)))) \
|
|
goto Failure; \
|
|
} while (0)
|
|
|
|
#define APPEND(b, c) \
|
|
do { \
|
|
if (b##_len + 2 > b##_cap) { \
|
|
b##_cap += b##_cap >> 1; \
|
|
void *p_ = realloc(b##_ptr, b##_cap * sizeof(*b##_ptr)); \
|
|
if (!p_) \
|
|
goto Failure; \
|
|
b##_ptr = p_; \
|
|
} \
|
|
b##_ptr[b##_len++] = c; \
|
|
b##_ptr[b##_len] = 0; \
|
|
} while (0)
|
|
|
|
#define APPEND_DUP(b, s) \
|
|
do { \
|
|
char *tmp; \
|
|
if (!(tmp = strdup(s))) \
|
|
goto Failure; \
|
|
APPEND(args, tmp); \
|
|
} while (0)
|
|
|
|
static int esc(int c) {
|
|
switch (c) {
|
|
case 'a':
|
|
return '\a';
|
|
case 'b':
|
|
return '\b';
|
|
case 't':
|
|
return '\t';
|
|
case 'n':
|
|
return '\n';
|
|
case 'v':
|
|
return '\v';
|
|
case 'f':
|
|
return '\f';
|
|
case 'r':
|
|
return '\r';
|
|
case 'e':
|
|
return '\e';
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
static void cosmo_args_free(void *list) {
|
|
char **args = list;
|
|
char *arg;
|
|
while ((arg = *args++))
|
|
free(arg);
|
|
free(list);
|
|
}
|
|
|
|
/**
|
|
* Replaces argument list with `/zip/.args` contents if it exists.
|
|
*
|
|
* First read the documentation to LoadZipArgs(). This works basically
|
|
* the same, assuming you pass `"/zip/.args"` as the first argument. The
|
|
* difference is that arguments here are parsed more similarly to the
|
|
* shell. In the old version, if you wanted your zip .args config to
|
|
* insert three arguments at the beginning of your argv, you'd say:
|
|
*
|
|
* arg1
|
|
* arg2
|
|
* arg3
|
|
*
|
|
* This will still work. You can also now say:
|
|
*
|
|
* arg1 arg2
|
|
* arg3
|
|
*
|
|
* This breaks backwards compatibility, since the old design was made
|
|
* for programs like ar.ape that wanted to be able to accept filename
|
|
* arguments that could potentially have spaces. This new parser, on the
|
|
* other hand, is designed to help offer the configurability a project
|
|
* like llamafile needs, without going so far as to be Turing complete.
|
|
* For example, you could say:
|
|
*
|
|
* # this is a comment
|
|
* this\ is' a single arg'"ok"# # comment
|
|
*
|
|
* Which will result in the C string `"this is a single argok#"`. You
|
|
* can even use $VAR notation to schlep in environment variables. Here's
|
|
* how this is different from shell:
|
|
*
|
|
* 1. We don't expand $foo into multiple arguments if it has spaces
|
|
* 2. Double quoted strings work like they do in C, e.g. `"\177\x7f\n"`
|
|
* 3. You can't recursively reference environment variables
|
|
*
|
|
* If the process was started in a degenerate state without argv[0] then
|
|
* GetProgramExecutableName() will be inserted in its place, on success.
|
|
*
|
|
* The `path` argument may be null, in which case only normalization is
|
|
* performed. It is not considered an error if `path` is specified and
|
|
* the file doesn't exist. The errno state will be left dirty if that
|
|
* happens, so it can be checked by clearing `errno` before calling.
|
|
*
|
|
* The returned memory is copied and automatically freed on exit().
|
|
*
|
|
* @return argc on success, or -1 w/ errno
|
|
*/
|
|
int cosmo_args(const char *path, char ***argv) {
|
|
|
|
// the file
|
|
int fd = -1;
|
|
|
|
// variable name builder
|
|
int var_cap = 0;
|
|
int var_len = 0;
|
|
char *var_ptr = 0;
|
|
|
|
// argument string builder
|
|
int arg_cap = 0;
|
|
int arg_len = 0;
|
|
char *arg_ptr = 0;
|
|
|
|
// argument array builder
|
|
int args_cap = 0;
|
|
int args_len = 0;
|
|
char **args_ptr = 0;
|
|
|
|
// initialize memory
|
|
CLEAR(var);
|
|
CLEAR(arg);
|
|
CLEAR(args);
|
|
|
|
// state machine
|
|
enum {
|
|
NORMAL,
|
|
COMMENT,
|
|
ARGUMENT,
|
|
BACKSLASH,
|
|
DOLLAR,
|
|
DOLLAR_VAR,
|
|
DOLLAR_LCB,
|
|
DOT,
|
|
DOT_DOT,
|
|
DOT_DOT_DOT,
|
|
QUOTE,
|
|
DQUOTE,
|
|
DQUOTE_DOLLAR,
|
|
DQUOTE_DOLLAR_VAR,
|
|
DQUOTE_DOLLAR_LCB,
|
|
DQUOTE_BACKSLASH,
|
|
DQUOTE_BACKSLASH_X,
|
|
DQUOTE_BACKSLASH_X_XDIGIT,
|
|
DQUOTE_BACKSLASH_DIGIT,
|
|
DQUOTE_BACKSLASH_DIGIT_DIGIT,
|
|
} t = NORMAL;
|
|
|
|
// extra state
|
|
int x, numba = 0;
|
|
|
|
// add program argument
|
|
char **argvp = *argv;
|
|
if (*argvp) {
|
|
APPEND_DUP(args, *argvp++);
|
|
} else {
|
|
APPEND_DUP(args, GetProgramExecutableName());
|
|
}
|
|
|
|
// perform i/o
|
|
if (path) {
|
|
if ((fd = open(path, O_RDONLY)) == -1)
|
|
if (errno != ENOENT && errno != ENOTDIR)
|
|
goto Failure;
|
|
if (fd != -1) {
|
|
for (;;) {
|
|
char buf[512];
|
|
int got = read(fd, buf, sizeof(buf));
|
|
if (got == -1)
|
|
goto Failure;
|
|
if (!got)
|
|
break;
|
|
for (int i = 0; i < got; ++i) {
|
|
int c = buf[i] & 255;
|
|
switch (t) {
|
|
|
|
case NORMAL:
|
|
switch (c) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
case '\n':
|
|
case '\f':
|
|
case '\v':
|
|
break;
|
|
case '#':
|
|
t = COMMENT;
|
|
break;
|
|
case '\'':
|
|
t = QUOTE;
|
|
break;
|
|
case '"':
|
|
t = DQUOTE;
|
|
break;
|
|
case '$':
|
|
t = DOLLAR;
|
|
break;
|
|
case '.':
|
|
t = DOT;
|
|
break;
|
|
case '\\':
|
|
t = BACKSLASH;
|
|
break;
|
|
default:
|
|
APPEND(arg, c);
|
|
t = ARGUMENT;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
Argument:
|
|
case ARGUMENT:
|
|
switch (c) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\r':
|
|
case '\n':
|
|
case '\f':
|
|
case '\v':
|
|
APPEND(args, arg_ptr);
|
|
CLEAR(arg);
|
|
t = NORMAL;
|
|
break;
|
|
case '\'':
|
|
t = QUOTE;
|
|
break;
|
|
case '"':
|
|
t = DQUOTE;
|
|
break;
|
|
case '$':
|
|
t = DOLLAR;
|
|
break;
|
|
case '\\':
|
|
t = BACKSLASH;
|
|
break;
|
|
default:
|
|
APPEND(arg, c);
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case BACKSLASH:
|
|
if (c == '\r') {
|
|
// do nothing
|
|
} else if (c == '\n') {
|
|
t = NORMAL;
|
|
} else if ((x = esc(c)) != -1) {
|
|
APPEND(arg, x);
|
|
t = ARGUMENT;
|
|
} else {
|
|
APPEND(arg, c);
|
|
t = ARGUMENT;
|
|
}
|
|
break;
|
|
|
|
case COMMENT:
|
|
if (c == '\n')
|
|
t = NORMAL;
|
|
break;
|
|
|
|
case DOLLAR:
|
|
if (isalnum(c) || c == '_') {
|
|
APPEND(var, c);
|
|
t = DOLLAR_VAR;
|
|
} else if (c == '{') {
|
|
t = DOLLAR_LCB;
|
|
} else {
|
|
APPEND(arg, '$');
|
|
if (c != '$') {
|
|
t = ARGUMENT;
|
|
goto Argument;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case DOLLAR_VAR:
|
|
if (isalnum(c) || c == '_') {
|
|
APPEND(var, c);
|
|
} else {
|
|
char *val = getenv(var_ptr);
|
|
if (!val)
|
|
val = "";
|
|
free(var_ptr);
|
|
CLEAR(var);
|
|
while (*val)
|
|
APPEND(arg, *val++);
|
|
t = ARGUMENT;
|
|
goto Argument;
|
|
}
|
|
break;
|
|
|
|
case DOLLAR_LCB:
|
|
if (c == '}') {
|
|
char *val = getenv(var_ptr);
|
|
if (!val)
|
|
val = "";
|
|
free(var_ptr);
|
|
CLEAR(var);
|
|
while (*val)
|
|
APPEND(arg, *val++);
|
|
t = ARGUMENT;
|
|
} else {
|
|
APPEND(var, c);
|
|
}
|
|
break;
|
|
|
|
case QUOTE:
|
|
if (c == '\'') {
|
|
t = ARGUMENT;
|
|
} else {
|
|
APPEND(arg, c);
|
|
}
|
|
break;
|
|
|
|
Dquote:
|
|
case DQUOTE:
|
|
if (c == '"') {
|
|
t = ARGUMENT;
|
|
} else if (c == '$') {
|
|
t = DQUOTE_DOLLAR;
|
|
} else if (c == '\\') {
|
|
t = DQUOTE_BACKSLASH;
|
|
} else {
|
|
APPEND(arg, c);
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_DOLLAR:
|
|
if (isalnum(c) || c == '_') {
|
|
APPEND(var, c);
|
|
t = DQUOTE_DOLLAR_VAR;
|
|
} else if (c == '{') {
|
|
t = DQUOTE_DOLLAR_LCB;
|
|
} else {
|
|
APPEND(arg, '$');
|
|
if (c != '$') {
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_DOLLAR_VAR:
|
|
if (isalnum(c) || c == '_') {
|
|
APPEND(var, c);
|
|
} else {
|
|
char *val = getenv(var_ptr);
|
|
if (!val)
|
|
val = "";
|
|
free(var_ptr);
|
|
CLEAR(var);
|
|
while (*val)
|
|
APPEND(arg, *val++);
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_DOLLAR_LCB:
|
|
if (c == '}') {
|
|
char *val = getenv(var_ptr);
|
|
if (!val)
|
|
val = "";
|
|
free(var_ptr);
|
|
CLEAR(var);
|
|
while (*val)
|
|
APPEND(arg, *val++);
|
|
t = DQUOTE;
|
|
} else {
|
|
APPEND(var, c);
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_BACKSLASH:
|
|
if (isdigit(c)) {
|
|
numba = c - '0';
|
|
t = DQUOTE_BACKSLASH_DIGIT;
|
|
} else if (c == 'x') {
|
|
t = DQUOTE_BACKSLASH_X;
|
|
} else if ((x = esc(c)) != -1) {
|
|
APPEND(arg, x);
|
|
t = DQUOTE;
|
|
} else if (c == '\r') {
|
|
// do nothing
|
|
} else if (c == '\n') {
|
|
t = DQUOTE;
|
|
} else {
|
|
APPEND(arg, c);
|
|
t = DQUOTE;
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_BACKSLASH_DIGIT:
|
|
if (isdigit(c)) {
|
|
numba <<= 3;
|
|
numba += c - '0';
|
|
t = DQUOTE_BACKSLASH_DIGIT_DIGIT;
|
|
} else {
|
|
APPEND(arg, numba);
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_BACKSLASH_DIGIT_DIGIT:
|
|
if (isdigit(c)) {
|
|
numba <<= 3;
|
|
numba += c - '0';
|
|
APPEND(arg, numba);
|
|
t = DQUOTE;
|
|
} else {
|
|
APPEND(arg, numba);
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_BACKSLASH_X:
|
|
if ((x = kHexToInt[c]) != -1) {
|
|
numba = x;
|
|
t = DQUOTE_BACKSLASH_X_XDIGIT;
|
|
} else {
|
|
APPEND(arg, 'x');
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
break;
|
|
|
|
case DQUOTE_BACKSLASH_X_XDIGIT:
|
|
if ((x = kHexToInt[c]) != -1) {
|
|
numba <<= 4;
|
|
numba += x;
|
|
APPEND(arg, numba);
|
|
t = DQUOTE;
|
|
} else {
|
|
APPEND(arg, numba);
|
|
t = DQUOTE;
|
|
goto Dquote;
|
|
}
|
|
break;
|
|
|
|
case DOT:
|
|
if (c == '.') {
|
|
t = DOT_DOT;
|
|
} else {
|
|
APPEND(arg, '.');
|
|
t = ARGUMENT;
|
|
goto Argument;
|
|
}
|
|
break;
|
|
|
|
case DOT_DOT:
|
|
if (c == '.') {
|
|
t = DOT_DOT_DOT;
|
|
} else {
|
|
APPEND(arg, '.');
|
|
APPEND(arg, '.');
|
|
t = ARGUMENT;
|
|
goto Argument;
|
|
}
|
|
break;
|
|
|
|
case DOT_DOT_DOT:
|
|
if (isspace(c)) {
|
|
while (*argvp)
|
|
APPEND_DUP(args, *argvp++);
|
|
t = NORMAL;
|
|
} else {
|
|
APPEND(arg, '.');
|
|
APPEND(arg, '.');
|
|
APPEND(arg, '.');
|
|
t = ARGUMENT;
|
|
goto Argument;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
__builtin_unreachable();
|
|
}
|
|
}
|
|
}
|
|
if (close(fd))
|
|
goto Failure;
|
|
|
|
// clean up dirty state
|
|
switch (t) {
|
|
case DOT:
|
|
APPEND(arg, '.');
|
|
break;
|
|
case DOT_DOT:
|
|
APPEND(arg, '.');
|
|
APPEND(arg, '.');
|
|
break;
|
|
case DOT_DOT_DOT:
|
|
while (*argvp)
|
|
APPEND_DUP(args, *argvp++);
|
|
break;
|
|
case DOLLAR:
|
|
APPEND(arg, '$');
|
|
break;
|
|
case DOLLAR_VAR:
|
|
case DQUOTE_DOLLAR_VAR:
|
|
char *val = getenv(var_ptr);
|
|
if (!val)
|
|
val = "";
|
|
while (*val)
|
|
APPEND(arg, *val++);
|
|
break;
|
|
case DOLLAR_LCB:
|
|
case DQUOTE_DOLLAR_LCB:
|
|
APPEND(arg, '$');
|
|
APPEND(arg, '{');
|
|
for (int j = 0; var_ptr[j]; ++j)
|
|
APPEND(arg, var_ptr[j]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (arg_len) {
|
|
APPEND(args, arg_ptr);
|
|
CLEAR(arg);
|
|
}
|
|
}
|
|
}
|
|
|
|
// append original argv if ... wasn't specified
|
|
while (*argvp)
|
|
APPEND_DUP(args, *argvp++);
|
|
|
|
// return result
|
|
__cxa_atexit(cosmo_args_free, args_ptr, 0);
|
|
*argv = args_ptr;
|
|
free(arg_ptr);
|
|
free(var_ptr);
|
|
return args_len;
|
|
|
|
Failure:
|
|
cosmo_args_free(args_ptr);
|
|
free(arg_ptr);
|
|
if (fd != -1)
|
|
close(fd);
|
|
return -1;
|
|
}
|