Make fixes, improvements, and chibicc python bindings

- python now mixes audio 10x faster
- python octal notation is restored
- chibicc now builds code 3x faster
- chibicc now has help documentation
- chibicc can now generate basic python bindings
- linenoise now supports some paredit-like features

See #141
This commit is contained in:
Justine Tunney 2021-10-08 08:11:51 -07:00
parent 28997f3acb
commit 7061c79c22
121 changed files with 5272 additions and 1928 deletions

View file

@ -22,7 +22,11 @@
// standard's wording:
// https://github.com/rui314/chibicc/wiki/cpp.algo.pdf
#include "libc/log/libfatal.internal.h"
#include "libc/mem/arena.h"
#include "libc/stdio/stdio.h"
#include "third_party/chibicc/chibicc.h"
#include "third_party/chibicc/kw.h"
typedef struct CondIncl CondIncl;
typedef struct Hideset Hideset;
@ -65,8 +69,8 @@ static int include_next_idx;
static Token *preprocess2(Token *);
static Macro *find_macro(Token *);
static bool is_hash(Token *tok) {
return tok->at_bol && EQUAL(tok, "#");
static inline bool is_hash(Token *tok) {
return tok->at_bol && tok->len == 1 && tok->loc[0] == '#';
}
// Some preprocessor directives such as #include allow extraneous
@ -151,13 +155,17 @@ static Token *append(Token *tok1, Token *tok2) {
}
static Token *skip_cond_incl2(Token *tok) {
unsigned char kw;
while (tok->kind != TK_EOF) {
if (is_hash(tok) && (EQUAL(tok->next, "if") || EQUAL(tok->next, "ifdef") ||
EQUAL(tok->next, "ifndef"))) {
tok = skip_cond_incl2(tok->next->next);
continue;
if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) {
if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) {
tok = skip_cond_incl2(tok->next->next);
continue;
}
if (kw == KW_ENDIF) {
return tok->next->next;
}
}
if (is_hash(tok) && EQUAL(tok->next, "endif")) return tok->next->next;
tok = tok->next;
}
return tok;
@ -166,15 +174,17 @@ static Token *skip_cond_incl2(Token *tok) {
// Skip until next `#else`, `#elif` or `#endif`.
// Nested `#if` and `#endif` are skipped.
static Token *skip_cond_incl(Token *tok) {
unsigned char kw;
while (tok->kind != TK_EOF) {
if (is_hash(tok) && (EQUAL(tok->next, "if") || EQUAL(tok->next, "ifdef") ||
EQUAL(tok->next, "ifndef"))) {
tok = skip_cond_incl2(tok->next->next);
continue;
if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) {
if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) {
tok = skip_cond_incl2(tok->next->next);
continue;
}
if (kw == KW_ELIF || kw == KW_ELSE || kw == KW_ENDIF) {
break;
}
}
if (is_hash(tok) && (EQUAL(tok->next, "elif") || EQUAL(tok->next, "else") ||
EQUAL(tok->next, "endif")))
break;
tok = tok->next;
}
return tok;
@ -210,14 +220,20 @@ static Token *new_str_token(char *str, Token *tmpl) {
static Token *copy_line(Token **rest, Token *tok) {
Token head = {};
Token *cur = &head;
for (; !tok->at_bol; tok = tok->next) cur = cur->next = copy_token(tok);
for (; !tok->at_bol; tok = tok->next) {
cur = cur->next = copy_token(tok);
}
cur->next = new_eof(tok);
*rest = tok;
return head.next;
}
static Token *new_num_token(int val, Token *tmpl) {
char *buf = xasprintf("%d\n", val);
char *p, *buf;
p = buf = malloc(13);
p = FormatInt32(p, val);
p[0] = '\n';
p[1] = 0;
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
}
@ -248,6 +264,7 @@ static Token *read_const_expr(Token **rest, Token *tok) {
// Read and evaluate a constant expression.
static long eval_const_expr(Token **rest, Token *tok) {
__arena_push();
Token *start = tok;
Token *expr = read_const_expr(rest, tok->next);
expr = preprocess2(expr);
@ -268,6 +285,7 @@ static long eval_const_expr(Token **rest, Token *tok) {
Token *rest2;
long val = const_expr(&rest2, expr);
if (rest2->kind != TK_EOF) error_tok(rest2, "extra token");
__arena_pop();
return val;
}
@ -327,7 +345,7 @@ static Macro *read_macro_definition(Token **rest, Token *tok) {
if (tok->kind != TK_IDENT) error_tok(tok, "macro name must be an identifier");
name = strndup(tok->loc, tok->len);
tok = tok->next;
if (!tok->has_space && EQUAL(tok, "(")) {
if (!tok->has_space && tok->len == 1 && tok->loc[0] == '(') {
// Function-like macro
char *va_args_name = NULL;
MacroParam *params = read_macro_params(&tok, tok->next, &va_args_name);
@ -346,13 +364,18 @@ static MacroArg *read_macro_arg_one(Token **rest, Token *tok, bool read_rest) {
Token *cur = &head;
int level = 0;
for (;;) {
if (level == 0 && EQUAL(tok, ")")) break;
if (level == 0 && !read_rest && EQUAL(tok, ",")) break;
if (level == 0 && tok->len == 1 && tok->loc[0] == ')') {
break;
}
if (level == 0 && !read_rest && tok->len == 1 && tok->loc[0] == ',') {
break;
}
if (tok->kind == TK_EOF) error_tok(tok, "premature end of input");
if (EQUAL(tok, "("))
if (tok->len == 1 && tok->loc[0] == '(') {
level++;
else if (EQUAL(tok, ")"))
} else if (tok->len == 1 && tok->loc[0] == ')') {
level--;
}
cur = cur->next = copy_token(tok);
tok = tok->next;
}
@ -377,7 +400,7 @@ static MacroArg *read_macro_args(Token **rest, Token *tok, MacroParam *params,
}
if (va_args_name) {
MacroArg *arg;
if (EQUAL(tok, ")")) {
if (tok->len == 1 && tok->loc[0] == ')') {
arg = calloc(1, sizeof(MacroArg));
arg->tok = new_eof(tok);
} else {
@ -461,10 +484,11 @@ static Token *subst(Token *tok, MacroArg *args) {
Token *cur = &head;
while (tok->kind != TK_EOF) {
// "#" followed by a parameter is replaced with stringized actuals.
if (EQUAL(tok, "#")) {
if (tok->len == 1 && tok->loc[0] == '#') {
MacroArg *arg = find_arg(args, tok->next);
if (!arg)
if (!arg) {
error_tok(tok->next, "'#' is not followed by a macro parameter");
}
cur = cur->next = stringize(tok, arg->tok);
tok = tok->next->next;
continue;
@ -472,7 +496,9 @@ static Token *subst(Token *tok, MacroArg *args) {
// [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded
// to the empty token list. Otherwise, its expaned to `,` and
// __VA_ARGS__.
if (EQUAL(tok, ",") && EQUAL(tok->next, "##")) {
if (tok->len == 1 && tok->loc[0] == ',' &&
(tok->next->len == 2 &&
(tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) {
MacroArg *arg = find_arg(args, tok->next->next);
if (arg && arg->is_va_args) {
if (arg->tok->kind == TK_EOF) {
@ -484,7 +510,7 @@ static Token *subst(Token *tok, MacroArg *args) {
continue;
}
}
if (EQUAL(tok, "##")) {
if (tok->len == 2 && tok->loc[0] == '#' && tok->loc[1] == '#') {
if (cur == &head)
error_tok(tok, "'##' cannot appear at start of macro expansion");
if (tok->next->kind == TK_EOF)
@ -504,7 +530,8 @@ static Token *subst(Token *tok, MacroArg *args) {
continue;
}
MacroArg *arg = find_arg(args, tok);
if (arg && EQUAL(tok->next, "##")) {
if (arg && (tok->next->len == 2 &&
(tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) {
Token *rhs = tok->next->next;
if (arg->tok->kind == TK_EOF) {
MacroArg *arg2 = find_arg(args, rhs);
@ -577,7 +604,7 @@ static bool expand_macro(Token **rest, Token *tok) {
}
// If a funclike macro token is not followed by an argument list,
// treat it as a normal identifier.
if (!EQUAL(tok->next, "(")) return false;
if (!(tok->next->len == 1 && tok->next->loc[0] == '(')) return false;
// Function-like macro application
Token *macro_token = tok;
MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name);
@ -709,6 +736,7 @@ static Token *include_file(Token *tok, char *path, Token *filename_tok) {
// Read #line arguments
static void read_line_marker(Token **rest, Token *tok) {
// TODO: This is broken if file is different? See gperf codegen.
Token *start = tok;
tok = preprocess(copy_line(rest, tok));
if (tok->kind != TK_NUM || tok->ty->kind != TY_INT)
@ -723,6 +751,7 @@ static void read_line_marker(Token **rest, Token *tok) {
// Visit all tokens in `tok` while evaluating preprocessing
// macros and directives.
static Token *preprocess2(Token *tok) {
unsigned char kw;
Token head = {};
Token *cur = &head;
while (tok->kind != TK_EOF) {
@ -744,104 +773,111 @@ static Token *preprocess2(Token *tok) {
}
Token *start = tok;
tok = tok->next;
if (EQUAL(tok, "include")) {
bool is_dquote;
char *filename = read_include_filename(&tok, tok->next, &is_dquote);
if (filename[0] != '/' && is_dquote) {
char *path =
xasprintf("%s/%s", dirname(strdup(start->file->name)), filename);
if (fileexists(path)) {
tok = include_file(tok, path, start->next->next);
continue;
if ((kw = GetKw(tok->loc, tok->len))) {
if (kw == KW_INCLUDE) {
bool is_dquote;
char *filename = read_include_filename(&tok, tok->next, &is_dquote);
if (filename[0] != '/' && is_dquote) {
char *tmp = strdup(start->file->name);
char *path = xasprintf("%s/%s", dirname(tmp), filename);
free(tmp);
bool exists = fileexists(path);
free(path);
if (exists) {
tok = include_file(tok, path, start->next->next);
continue;
}
}
char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (kw == KW_INCLUDE_NEXT) {
bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (kw == KW_DEFINE) {
read_macro_definition(&tok, tok->next);
continue;
}
if (kw == KW_UNDEF) {
tok = tok->next;
if (tok->kind != TK_IDENT)
error_tok(tok, "macro name must be an identifier");
undef_macro(strndup(tok->loc, tok->len));
tok = skip_line(tok->next);
continue;
}
if (kw == KW_IF) {
long val = eval_const_expr(&tok, tok);
push_cond_incl(start, val);
if (!val) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_IFDEF) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, defined);
tok = skip_line(tok->next->next);
if (!defined) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_IFNDEF) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, !defined);
tok = skip_line(tok->next->next);
if (defined) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ELIF) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #elif");
cond_incl->ctx = IN_ELIF;
if (!cond_incl->included && eval_const_expr(&tok, tok))
cond_incl->included = true;
else
tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ELSE) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #else");
cond_incl->ctx = IN_ELSE;
tok = skip_line(tok->next);
if (cond_incl->included) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ENDIF) {
if (!cond_incl) error_tok(start, "stray #endif");
cond_incl = cond_incl->next;
tok = skip_line(tok->next);
continue;
}
if (kw == KW_LINE) {
read_line_marker(&tok, tok->next);
continue;
}
char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "include_next")) {
bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (EQUAL(tok, "define")) {
read_macro_definition(&tok, tok->next);
continue;
}
if (EQUAL(tok, "undef")) {
tok = tok->next;
if (tok->kind != TK_IDENT)
error_tok(tok, "macro name must be an identifier");
undef_macro(strndup(tok->loc, tok->len));
tok = skip_line(tok->next);
continue;
}
if (EQUAL(tok, "if")) {
long val = eval_const_expr(&tok, tok);
push_cond_incl(start, val);
if (!val) tok = skip_cond_incl(tok);
continue;
}
if (EQUAL(tok, "ifdef")) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, defined);
tok = skip_line(tok->next->next);
if (!defined) tok = skip_cond_incl(tok);
continue;
}
if (EQUAL(tok, "ifndef")) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, !defined);
tok = skip_line(tok->next->next);
if (defined) tok = skip_cond_incl(tok);
continue;
}
if (EQUAL(tok, "elif")) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #elif");
cond_incl->ctx = IN_ELIF;
if (!cond_incl->included && eval_const_expr(&tok, tok))
cond_incl->included = true;
else
tok = skip_cond_incl(tok);
continue;
}
if (EQUAL(tok, "else")) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #else");
cond_incl->ctx = IN_ELSE;
tok = skip_line(tok->next);
if (cond_incl->included) tok = skip_cond_incl(tok);
continue;
}
if (EQUAL(tok, "endif")) {
if (!cond_incl) error_tok(start, "stray #endif");
cond_incl = cond_incl->next;
tok = skip_line(tok->next);
continue;
}
if (EQUAL(tok, "line")) {
read_line_marker(&tok, tok->next);
continue;
}
if (tok->kind == TK_PP_NUM) {
read_line_marker(&tok, tok);
continue;
}
if (EQUAL(tok, "pragma") && EQUAL(tok->next, "once")) {
if (kw == KW_PRAGMA && EQUAL(tok->next, "once")) {
hashmap_put(&pragma_once, tok->file->name, (void *)1);
tok = skip_line(tok->next->next);
continue;
}
if (EQUAL(tok, "pragma")) {
if (kw == KW_PRAGMA) {
do {
tok = tok->next;
} while (!tok->at_bol);
continue;
}
if (EQUAL(tok, "error")) error_tok(tok, "error");
if (kw == KW_ERROR) {
error_tok(tok, "error");
}
// `#`-only line is legal. It's called a null directive.
if (tok->at_bol) continue;
error_tok(tok, "invalid preprocessor directive");
@ -901,11 +937,7 @@ static Token *base_file_macro(Token *tmpl) {
// __DATE__ is expanded to the current date, e.g. "May 17 2020".
static char *format_date(struct tm *tm) {
_Alignas(char) static char mon[][4] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
};
return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday,
return xasprintf("\"%s %2d %d\"", kMonthNameShort[tm->tm_mon], tm->tm_mday,
tm->tm_year + 1900);
}
@ -914,6 +946,15 @@ static char *format_time(struct tm *tm) {
return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
}
void init_macros_conditional(void) {
if (opt_pg) define_macro("__PG__", "1");
if (opt_pic) define_macro("__PIC__", "1");
if (opt_sse3) define_macro("__SSE3__", "1");
if (opt_sse4) define_macro("__SSE4__", "1");
if (opt_popcnt) define_macro("__POPCNT__", "1");
if (opt_fentry) define_macro("__MFENTRY__", "1");
}
void init_macros(void) {
char *val, *name = "\
__chibicc__\000\
@ -1232,9 +1273,6 @@ __SSE2_MATH__\000\
define_macro(name, val);
name = val + strlen(val) + 1;
} while (*name);
#ifdef __SSE3__
define_macro("__SSE3__", "1");
#endif
add_builtin("__FILE__", file_macro);
add_builtin("__LINE__", line_macro);
add_builtin("__COUNTER__", counter_macro);
@ -1306,8 +1344,10 @@ static void join_adjacent_string_literals(Token *tok) {
len = len + t->ty->array_len - 1;
}
char *buf = calloc(tok1->ty->base->size, len);
int j = 0;
int i = 0;
for (Token *t = tok1; t != tok2; t = t->next) {
++j;
memcpy(buf + i, t->str, t->ty->size);
i = i + t->ty->size - t->ty->base->size;
}