Invent new cosmo_args() api

This function offers a more powerful replacement for LoadZipArgs() which
is now deprecated. By writing your C programs as follows:

    int main(int argc, char *argv[]) {
      argc = cosmo_args("/zip/.args", &argv);
      // ...
    }

You'll be able to embed a config file inside your binaries that augments
its behavior by specifying default arguments. The way you should not use
it on llamafile would be something like this:

    # specify model
    -m Qwen2.5-Coder-34B-Instruct.Q6_K.gguf

    # prevent settings below from being changed
    ...

    # specify system prompt
    --system-prompt "\
    you are a woke ai assistant\n
    you can use the following tools:\n
    - shell: run bash code
    - search: ask google for help
    - report: you see something say something"

    # hide system prompt in user interface
    --no-display-prompt
This commit is contained in:
Justine Tunney 2024-11-13 00:57:10 -08:00
parent 5ce5fb6f2a
commit 4e9566cd33
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
5 changed files with 777 additions and 0 deletions

View file

@ -16,6 +16,7 @@ int __demangle(char *, const char *, size_t) libcesque;
int __is_mangled(const char *) libcesque;
bool32 IsLinuxModern(void) libcesque;
int LoadZipArgs(int *, char ***) libcesque;
int cosmo_args(const char *, char ***) libcesque;
COSMOPOLITAN_C_END_
#endif /* COSMOPOLITAN_LIBC_COSMO_H_ */

192
test/tool/args/args2_test.c Normal file
View file

@ -0,0 +1,192 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/cosmo.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
void SetUpOnce(void) {
testlib_enable_tmp_setup_teardown();
}
TEST(cosmo_args, normalize) {
char *args[] = {0};
char **argv = args;
ASSERT_EQ(1, cosmo_args(0, &argv));
ASSERT_STREQ(GetProgramExecutableName(), argv[0]);
}
TEST(cosmo_args, test) {
xbarf(".args", "a b c", -1);
char *args[] = {"prog", "arg", 0};
char **argv = args;
ASSERT_EQ(5, cosmo_args(".args", &argv));
ASSERT_STREQ("prog", argv[0]);
ASSERT_STREQ("a", argv[1]);
ASSERT_STREQ("b", argv[2]);
ASSERT_STREQ("c", argv[3]);
ASSERT_STREQ("arg", argv[4]);
}
TEST(cosmo_args, perline) {
xbarf(".args", "a\nb\nc\n", -1);
char *args[] = {"prog", "arg", 0};
char **argv = args;
ASSERT_EQ(5, cosmo_args(".args", &argv));
ASSERT_STREQ("prog", argv[0]);
ASSERT_STREQ("a", argv[1]);
ASSERT_STREQ("b", argv[2]);
ASSERT_STREQ("c", argv[3]);
ASSERT_STREQ("arg", argv[4]);
}
TEST(cosmo_args, dots_end) {
xbarf(".args", "a b c ...", -1);
char *args[] = {"prog", "arg", 0};
char **argv = args;
ASSERT_EQ(5, cosmo_args(".args", &argv));
ASSERT_STREQ("prog", argv[0]);
ASSERT_STREQ("a", argv[1]);
ASSERT_STREQ("b", argv[2]);
ASSERT_STREQ("c", argv[3]);
ASSERT_STREQ("arg", argv[4]);
}
TEST(cosmo_args, dots_middle) {
xbarf(".args", "a ... b c", -1);
char *args[] = {"prog", "arg", 0};
char **argv = args;
ASSERT_EQ(5, cosmo_args(".args", &argv));
ASSERT_STREQ("prog", argv[0]);
ASSERT_STREQ("a", argv[1]);
ASSERT_STREQ("arg", argv[2]);
ASSERT_STREQ("b", argv[3]);
ASSERT_STREQ("c", argv[4]);
}
TEST(cosmo_args, quote) {
xbarf(".args", " 'hi \\n there'# ", -1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(2, cosmo_args(".args", &argv));
ASSERT_STREQ("hi \\n there#", argv[1]);
}
TEST(cosmo_args, dquote) {
xbarf(".args", " \"hi \\a\\b\\t\\n\\v\\f\\r\\e\\0\\11 \\111 \xab there\"# ",
-1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(2, cosmo_args(".args", &argv));
ASSERT_STREQ("hi \a\b\t\n\v\f\r\e\0\11 \111 \xab there#", argv[1]);
}
TEST(cosmo_args, comment) {
xbarf(".args",
"# comment\n"
"a # hello there\n"
"b # yup\n",
-1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(3, cosmo_args(".args", &argv));
ASSERT_STREQ("a", argv[1]);
ASSERT_STREQ("b", argv[2]);
}
TEST(cosmo_args, backslash_newline) {
xbarf(".args",
"a\\\n"
"b\n",
-1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(2, cosmo_args(".args", &argv));
ASSERT_STREQ("ab", argv[1]);
}
TEST(cosmo_args, dotz) {
xbarf(".args", ". .. ...x", -1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(4, cosmo_args(".args", &argv));
ASSERT_STREQ(".", argv[1]);
ASSERT_STREQ("..", argv[2]);
ASSERT_STREQ("...x", argv[3]);
}
TEST(cosmo_args, env) {
setenv("foo", "bar", true);
xbarf(".args", "$foo x${foo}x \"$foo\" \"${foo}\" $foo", -1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(6, cosmo_args(".args", &argv));
ASSERT_STREQ("bar", argv[1]);
ASSERT_STREQ("xbarx", argv[2]);
ASSERT_STREQ("bar", argv[3]);
ASSERT_STREQ("bar", argv[4]);
ASSERT_STREQ("bar", argv[5]);
}
TEST(cosmo_args, dquote_backslash_newline) {
setenv("foo", "bar", true);
xbarf(".args",
"-p \"\\\n"
"hello\"\n",
-1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(3, cosmo_args(".args", &argv));
ASSERT_STREQ("-p", argv[1]);
ASSERT_STREQ("hello", argv[2]);
}
TEST(cosmo_args, dquote_plain_old_newline) {
setenv("foo", "bar", true);
xbarf(".args",
"-p \"\n"
"hello\"\n",
-1);
char *args[] = {0};
char **argv = args;
ASSERT_EQ(3, cosmo_args(".args", &argv));
ASSERT_STREQ("-p", argv[1]);
ASSERT_STREQ("\nhello", argv[2]);
}
#define LENGTH 128
#define ITERATIONS 5000
#define CHARSET "abc#'\"$.\\{} \r\n"
TEST(cosmo_args, fuzz) {
char s[LENGTH + 1] = {0};
for (int i = 0; i < ITERATIONS; ++i) {
for (int j = 0; j < LENGTH; ++j)
s[j] = CHARSET[rand() % (sizeof(CHARSET) - 1)];
xbarf(".args", s, -1);
char *args[] = {0};
char **argv = args;
cosmo_args(".args", &argv);
for (int j = 0; argv[j]; ++j)
free(argv[j]);
argv[0] = 0;
}
}

View file

@ -16,6 +16,7 @@ TOOL_ARGS_A_CHECKS = \
$(TOOL_ARGS_A).pkg
TOOL_ARGS_A_DIRECTDEPS = \
LIBC_CALLS \
LIBC_INTRIN \
LIBC_MEM \
LIBC_NEXGEN32E \

View file

@ -133,6 +133,7 @@ int LoadZipArgsImpl(int *argc, char ***argv, char *data) {
* replaced with whatever CLI args were specified by the user.
*
* @return 0 on success, or -1 if not found w/o errno clobber
* @deprecated please use `cosmo_args()` it's more powerful
*/
int LoadZipArgs(int *argc, char ***argv) {
int e;

582
tool/args/args2.c Normal file
View file

@ -0,0 +1,582 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2024 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/cosmo.h"
#include "libc/ctype.h"
#include "libc/cxxabi.h"
#include "libc/errno.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/str/tab.h"
#include "libc/sysv/consts/o.h"
__static_yoink("zipos");
#define CLEAR(b) \
do { \
b##_cap = 8; \
b##_len = 0; \
if (!(b##_ptr = calloc(b##_cap, sizeof(*b##_ptr)))) \
goto Failure; \
} while (0)
#define APPEND(b, c) \
do { \
if (b##_len + 2 > b##_cap) { \
b##_cap += b##_cap >> 1; \
void *p_ = realloc(b##_ptr, b##_cap * sizeof(*b##_ptr)); \
if (!p_) \
goto Failure; \
b##_ptr = p_; \
} \
b##_ptr[b##_len++] = c; \
b##_ptr[b##_len] = 0; \
} while (0)
#define APPEND_DUP(b, s) \
do { \
char *tmp; \
if (!(tmp = strdup(s))) \
goto Failure; \
APPEND(args, tmp); \
} while (0)
static int esc(int c) {
switch (c) {
case 'a':
return '\a';
case 'b':
return '\b';
case 't':
return '\t';
case 'n':
return '\n';
case 'v':
return '\v';
case 'f':
return '\f';
case 'r':
return '\r';
case 'e':
return '\e';
default:
return -1;
}
}
static void cosmo_args_free(void *list) {
char **args = list;
char *arg;
while ((arg = *args++))
free(arg);
free(list);
}
/**
* Replaces argument list with `/zip/.args` contents if it exists.
*
* First read the documentation to LoadZipArgs(). This works basically
* the same, assuming you pass `"/zip/.args"` as the first argument. The
* difference is that arguments here are parsed more similarly to the
* shell. In the old version, if you wanted your zip .args config to
* insert three arguments at the beginning of your argv, you'd say:
*
* arg1
* arg2
* arg3
*
* This will still work. You can also now say:
*
* arg1 arg2
* arg3
*
* This breaks backwards compatibility, since the old design was made
* for programs like ar.ape that wanted to be able to accept filename
* arguments that could potentially have spaces. This new parser, on the
* other hand, is designed to help offer the configurability a project
* like llamafile needs, without going so far as to be Turing complete.
* For example, you could say:
*
* # this is a comment
* this\ is' a single arg'"ok"# # comment
*
* Which will result in the C string `"this is a single argok#"`. You
* can even use $VAR notation to schlep in environment variables. Here's
* how this is different from shell:
*
* 1. We don't expand $foo into multiple arguments if it has spaces
* 2. Double quoted strings work like they do in C, e.g. `"\177\x7f\n"`
* 3. You can't recursively reference environment variables
*
* If the process was started in a degenerate state without argv[0] then
* GetProgramExecutableName() will be inserted in its place, on success.
*
* The `path` argument may be null, in which case only normalization is
* performed. It is not considered an error if `path` is specified and
* the file doesn't exist. The errno state will be left dirty if that
* happens, so it can be checked by clearing `errno` before calling.
*
* The returned memory is copied and automatically freed on exit().
*
* @return argc on success, or -1 w/ errno
*/
int cosmo_args(const char *path, char ***argv) {
// the file
int fd = -1;
// variable name builder
int var_cap = 0;
int var_len = 0;
char *var_ptr = 0;
// argument string builder
int arg_cap = 0;
int arg_len = 0;
char *arg_ptr = 0;
// argument array builder
int args_cap = 0;
int args_len = 0;
char **args_ptr = 0;
// initialize memory
CLEAR(var);
CLEAR(arg);
CLEAR(args);
// state machine
enum {
NORMAL,
COMMENT,
ARGUMENT,
BACKSLASH,
DOLLAR,
DOLLAR_VAR,
DOLLAR_LCB,
DOT,
DOT_DOT,
DOT_DOT_DOT,
QUOTE,
DQUOTE,
DQUOTE_DOLLAR,
DQUOTE_DOLLAR_VAR,
DQUOTE_DOLLAR_LCB,
DQUOTE_BACKSLASH,
DQUOTE_BACKSLASH_X,
DQUOTE_BACKSLASH_X_XDIGIT,
DQUOTE_BACKSLASH_DIGIT,
DQUOTE_BACKSLASH_DIGIT_DIGIT,
} t = NORMAL;
// extra state
int x, numba = 0;
// add program argument
char **argvp = *argv;
if (*argvp) {
APPEND_DUP(args, *argvp++);
} else {
APPEND_DUP(args, GetProgramExecutableName());
}
// perform i/o
if (path) {
if ((fd = open(path, O_RDONLY)) == -1)
if (errno != ENOENT && errno != ENOTDIR)
goto Failure;
if (fd != -1) {
for (;;) {
char buf[512];
int got = read(fd, buf, sizeof(buf));
if (got == -1)
goto Failure;
if (!got)
break;
for (int i = 0; i < got; ++i) {
int c = buf[i] & 255;
switch (t) {
case NORMAL:
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\f':
case '\v':
break;
case '#':
t = COMMENT;
break;
case '\'':
t = QUOTE;
break;
case '"':
t = DQUOTE;
break;
case '$':
t = DOLLAR;
break;
case '.':
t = DOT;
break;
case '\\':
t = BACKSLASH;
break;
default:
APPEND(arg, c);
t = ARGUMENT;
break;
}
break;
Argument:
case ARGUMENT:
switch (c) {
case ' ':
case '\t':
case '\r':
case '\n':
case '\f':
case '\v':
APPEND(args, arg_ptr);
CLEAR(arg);
t = NORMAL;
break;
case '\'':
t = QUOTE;
break;
case '"':
t = DQUOTE;
break;
case '$':
t = DOLLAR;
break;
case '\\':
t = BACKSLASH;
break;
default:
APPEND(arg, c);
break;
}
break;
case BACKSLASH:
if (c == '\r') {
// do nothing
} else if (c == '\n') {
t = NORMAL;
} else if ((x = esc(c)) != -1) {
APPEND(arg, x);
t = ARGUMENT;
} else {
APPEND(arg, c);
t = ARGUMENT;
}
break;
case COMMENT:
if (c == '\n')
t = NORMAL;
break;
case DOLLAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
t = DOLLAR_VAR;
} else if (c == '{') {
t = DOLLAR_LCB;
} else {
APPEND(arg, '$');
if (c != '$') {
t = ARGUMENT;
goto Argument;
}
}
break;
case DOLLAR_VAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
} else {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = ARGUMENT;
goto Argument;
}
break;
case DOLLAR_LCB:
if (c == '}') {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = ARGUMENT;
} else {
APPEND(var, c);
}
break;
case QUOTE:
if (c == '\'') {
t = ARGUMENT;
} else {
APPEND(arg, c);
}
break;
Dquote:
case DQUOTE:
if (c == '"') {
t = ARGUMENT;
} else if (c == '$') {
t = DQUOTE_DOLLAR;
} else if (c == '\\') {
t = DQUOTE_BACKSLASH;
} else {
APPEND(arg, c);
}
break;
case DQUOTE_DOLLAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
t = DQUOTE_DOLLAR_VAR;
} else if (c == '{') {
t = DQUOTE_DOLLAR_LCB;
} else {
APPEND(arg, '$');
if (c != '$') {
t = DQUOTE;
goto Dquote;
}
}
break;
case DQUOTE_DOLLAR_VAR:
if (isalnum(c) || c == '_') {
APPEND(var, c);
} else {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_DOLLAR_LCB:
if (c == '}') {
char *val = getenv(var_ptr);
if (!val)
val = "";
free(var_ptr);
CLEAR(var);
while (*val)
APPEND(arg, *val++);
t = DQUOTE;
} else {
APPEND(var, c);
}
break;
case DQUOTE_BACKSLASH:
if (isdigit(c)) {
numba = c - '0';
t = DQUOTE_BACKSLASH_DIGIT;
} else if (c == 'x') {
t = DQUOTE_BACKSLASH_X;
} else if ((x = esc(c)) != -1) {
APPEND(arg, x);
t = DQUOTE;
} else if (c == '\r') {
// do nothing
} else if (c == '\n') {
t = DQUOTE;
} else {
APPEND(arg, c);
t = DQUOTE;
}
break;
case DQUOTE_BACKSLASH_DIGIT:
if (isdigit(c)) {
numba <<= 3;
numba += c - '0';
t = DQUOTE_BACKSLASH_DIGIT_DIGIT;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_DIGIT_DIGIT:
if (isdigit(c)) {
numba <<= 3;
numba += c - '0';
APPEND(arg, numba);
t = DQUOTE;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_X:
if ((x = kHexToInt[c]) != -1) {
numba = x;
t = DQUOTE_BACKSLASH_X_XDIGIT;
} else {
APPEND(arg, 'x');
t = DQUOTE;
goto Dquote;
}
break;
case DQUOTE_BACKSLASH_X_XDIGIT:
if ((x = kHexToInt[c]) != -1) {
numba <<= 4;
numba += x;
APPEND(arg, numba);
t = DQUOTE;
} else {
APPEND(arg, numba);
t = DQUOTE;
goto Dquote;
}
break;
case DOT:
if (c == '.') {
t = DOT_DOT;
} else {
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
case DOT_DOT:
if (c == '.') {
t = DOT_DOT_DOT;
} else {
APPEND(arg, '.');
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
case DOT_DOT_DOT:
if (isspace(c)) {
while (*argvp)
APPEND_DUP(args, *argvp++);
t = NORMAL;
} else {
APPEND(arg, '.');
APPEND(arg, '.');
APPEND(arg, '.');
t = ARGUMENT;
goto Argument;
}
break;
default:
__builtin_unreachable();
}
}
}
if (close(fd))
goto Failure;
// clean up dirty state
switch (t) {
case DOT:
APPEND(arg, '.');
break;
case DOT_DOT:
APPEND(arg, '.');
APPEND(arg, '.');
break;
case DOT_DOT_DOT:
while (*argvp)
APPEND_DUP(args, *argvp++);
break;
case DOLLAR:
APPEND(arg, '$');
break;
case DOLLAR_VAR:
case DQUOTE_DOLLAR_VAR:
char *val = getenv(var_ptr);
if (!val)
val = "";
while (*val)
APPEND(arg, *val++);
break;
case DOLLAR_LCB:
case DQUOTE_DOLLAR_LCB:
APPEND(arg, '$');
APPEND(arg, '{');
for (int j = 0; var_ptr[j]; ++j)
APPEND(arg, var_ptr[j]);
break;
default:
break;
}
if (arg_len) {
APPEND(args, arg_ptr);
CLEAR(arg);
}
}
}
// append original argv if ... wasn't specified
while (*argvp)
APPEND_DUP(args, *argvp++);
// return result
__cxa_atexit(cosmo_args_free, args_ptr, 0);
*argv = args_ptr;
free(arg_ptr);
free(var_ptr);
return args_len;
Failure:
cosmo_args_free(args_ptr);
free(arg_ptr);
if (fd != -1)
close(fd);
return -1;
}