cosmopolitan/third_party/chibicc/preprocess.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1415 lines
38 KiB
C
Raw Permalink Normal View History

2020-12-01 11:43:40 +00:00
// This file implements the C preprocessor.
//
// The preprocessor takes a list of tokens as an input and returns a
// new list of tokens as an output.
//
// The preprocessing language is designed in such a way that that's
// guaranteed to stop even if there is a recursive macro.
// Informally speaking, a macro is applied only once for each token.
// That is, if a macro token T appears in a result of direct or
// indirect macro expansion of T, T won't be expanded any further.
// For example, if T is defined as U, and U is defined as T, then
// token T is expanded to U and then to T and the macro expansion
// stops at that point.
//
// To achieve the above behavior, we attach for each token a set of
// macro names from which the token is expanded. The set is called
// "hideset". Hideset is initially empty, and every time we expand a
// macro, the macro name is added to the resulting tokens' hidesets.
//
// The above macro expansion algorithm is explained in this document
// written by Dave Prossor, which is used as a basis for the
// standard's wording:
// https://github.com/rui314/chibicc/wiki/cpp.algo.pdf
2023-06-18 12:39:31 +00:00
#include "libc/fmt/libgen.h"
#include "libc/log/libfatal.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/x/xasprintf.h"
2020-12-01 11:43:40 +00:00
#include "third_party/chibicc/chibicc.h"
#include "third_party/chibicc/kw.h"
2020-12-01 11:43:40 +00:00
typedef struct CondIncl CondIncl;
typedef struct Hideset Hideset;
typedef struct MacroArg MacroArg;
typedef enum {
STR_NONE,
STR_UTF8,
STR_UTF16,
STR_UTF32,
STR_WIDE,
} StringKind;
2020-12-01 11:43:40 +00:00
struct MacroArg {
MacroArg *next;
char *name;
bool is_va_args;
Token *tok;
};
// `#if` can be nested, so we use a stack to manage nested `#if`s.
struct CondIncl {
CondIncl *next;
enum { IN_THEN, IN_ELIF, IN_ELSE } ctx;
Token *tok;
bool included;
};
struct Hideset {
Hideset *next;
char *name;
};
2020-12-26 10:09:07 +00:00
HashMap macros;
2020-12-01 11:43:40 +00:00
static CondIncl *cond_incl;
static HashMap pragma_once;
static int include_next_idx;
static Token *preprocess2(Token *);
static Macro *find_macro(Token *);
2020-12-01 11:43:40 +00:00
static inline bool is_hash(Token *tok) {
return tok->at_bol && tok->len == 1 && tok->loc[0] == '#';
2020-12-01 11:43:40 +00:00
}
// Some preprocessor directives such as #include allow extraneous
// tokens before newline. This function skips such tokens.
static Token *skip_line(Token *tok) {
if (tok->at_bol) return tok;
warn_tok(tok, "extra token");
while (tok->at_bol) tok = tok->next;
return tok;
}
static Token *copy_token(Token *tok) {
2020-12-09 21:53:02 +00:00
Token *t = alloc_token();
2020-12-01 11:43:40 +00:00
*t = *tok;
t->next = NULL;
return t;
}
static Token *new_eof(Token *tok) {
Token *t = copy_token(tok);
t->kind = TK_EOF;
t->len = 0;
return t;
}
static Hideset *new_hideset(char *name) {
Hideset *hs = calloc(1, sizeof(Hideset));
hs->name = name;
return hs;
}
static Hideset *hideset_union(Hideset *hs1, Hideset *hs2) {
Hideset head = {};
Hideset *cur = &head;
for (; hs1; hs1 = hs1->next) {
cur = cur->next = new_hideset(hs1->name);
}
2020-12-01 11:43:40 +00:00
cur->next = hs2;
return head.next;
}
static bool hideset_contains(Hideset *hs, char *s, int len) {
for (; hs; hs = hs->next) {
if (strlen(hs->name) == len && !strncmp(hs->name, s, len)) {
return true;
}
}
2020-12-01 11:43:40 +00:00
return false;
}
static Hideset *hideset_intersection(Hideset *hs1, Hideset *hs2) {
Hideset head = {};
Hideset *cur = &head;
for (; hs1; hs1 = hs1->next) {
if (hideset_contains(hs2, hs1->name, strlen(hs1->name))) {
2020-12-01 11:43:40 +00:00
cur = cur->next = new_hideset(hs1->name);
}
}
2020-12-01 11:43:40 +00:00
return head.next;
}
static Token *add_hideset(Token *tok, Hideset *hs) {
Token head = {};
Token *cur = &head;
for (; tok; tok = tok->next) {
Token *t = copy_token(tok);
t->hideset = hideset_union(t->hideset, hs);
cur = cur->next = t;
}
return head.next;
}
// Append tok2 to the end of tok1.
static Token *append(Token *tok1, Token *tok2) {
if (tok1->kind == TK_EOF) return tok2;
Token head = {};
Token *cur = &head;
for (; tok1->kind != TK_EOF; tok1 = tok1->next)
cur = cur->next = copy_token(tok1);
cur->next = tok2;
return head.next;
}
static Token *skip_cond_incl2(Token *tok) {
unsigned char kw;
2020-12-01 11:43:40 +00:00
while (tok->kind != TK_EOF) {
if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) {
if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) {
tok = skip_cond_incl2(tok->next->next);
continue;
}
if (kw == KW_ENDIF) {
return tok->next->next;
}
2020-12-01 11:43:40 +00:00
}
tok = tok->next;
}
return tok;
}
// Skip until next `#else`, `#elif` or `#endif`.
// Nested `#if` and `#endif` are skipped.
static Token *skip_cond_incl(Token *tok) {
unsigned char kw;
2020-12-01 11:43:40 +00:00
while (tok->kind != TK_EOF) {
if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) {
if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) {
tok = skip_cond_incl2(tok->next->next);
continue;
}
if (kw == KW_ELIF || kw == KW_ELSE || kw == KW_ENDIF) {
break;
}
2020-12-01 11:43:40 +00:00
}
tok = tok->next;
}
return tok;
}
// Double-quote a given string and returns it.
static char *quote_string(char *str) {
int bufsize = 3;
for (int i = 0; str[i]; i++) {
if (str[i] == '\\' || str[i] == '"') bufsize++;
bufsize++;
}
char *buf = calloc(1, bufsize);
char *p = buf;
*p++ = '"';
for (int i = 0; str[i]; i++) {
if (str[i] == '\\' || str[i] == '"') *p++ = '\\';
*p++ = str[i];
}
*p++ = '"';
*p++ = '\0';
return buf;
}
static Token *new_str_token(char *str, Token *tmpl) {
char *buf = quote_string(str);
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
}
// Copy all tokens until the next newline, terminate them with
// an EOF token and then returns them. This function is used to
// create a new list of tokens for `#if` arguments.
static Token *copy_line(Token **rest, Token *tok) {
Token head = {};
Token *cur = &head;
for (; !tok->at_bol; tok = tok->next) {
cur = cur->next = copy_token(tok);
}
2020-12-01 11:43:40 +00:00
cur->next = new_eof(tok);
*rest = tok;
return head.next;
}
static Token *new_num_token(int val, Token *tmpl) {
char *p, *buf;
p = buf = malloc(13);
p = FormatInt32(p, val);
p[0] = '\n';
p[1] = 0;
2020-12-09 21:53:02 +00:00
return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf));
2020-12-01 11:43:40 +00:00
}
static Token *read_const_expr(Token **rest, Token *tok) {
tok = copy_line(rest, tok);
Token head = {};
Token *cur = &head;
while (tok->kind != TK_EOF) {
// "defined(foo)" or "defined foo" becomes "1" if macro "foo"
// is defined. Otherwise "0".
if (EQUAL(tok, "defined")) {
2020-12-01 11:43:40 +00:00
Token *start = tok;
bool has_paren = CONSUME(&tok, tok->next, "(");
2020-12-01 11:43:40 +00:00
if (tok->kind != TK_IDENT)
error_tok(start, "macro name must be an identifier");
Macro *m = find_macro(tok);
tok = tok->next;
2020-12-09 12:00:48 +00:00
if (has_paren) tok = skip(tok, ')');
2020-12-01 11:43:40 +00:00
cur = cur->next = new_num_token(m ? 1 : 0, start);
continue;
}
cur = cur->next = tok;
tok = tok->next;
}
cur->next = tok;
return head.next;
}
// Read and evaluate a constant expression.
static long eval_const_expr(Token **rest, Token *tok) {
Token *start = tok;
Token *expr = read_const_expr(rest, tok->next);
expr = preprocess2(expr);
if (expr->kind == TK_EOF) error_tok(start, "no expression");
2020-12-09 21:53:02 +00:00
// [https://www.sigbus.info/n1570#6.10.1p4] The standard requires
// we replace remaining non-macro identifiers with "0" before
// evaluating a constant expression. For example, `#if foo` is
// equivalent to `#if 0` if foo is not defined.
2020-12-01 11:43:40 +00:00
for (Token *t = expr; t->kind != TK_EOF; t = t->next) {
if (t->kind == TK_IDENT) {
Token *next = t->next;
*t = *new_num_token(0, t);
t->next = next;
}
}
// Convert pp-numbers to regular numbers
convert_pp_tokens(expr);
Token *rest2;
long val = const_expr(&rest2, expr);
if (rest2->kind != TK_EOF && rest2->kind != TK_JAVADOWN) {
error_tok(rest2, "extra token");
}
2020-12-01 11:43:40 +00:00
return val;
}
static CondIncl *push_cond_incl(Token *tok, bool included) {
CondIncl *ci = calloc(1, sizeof(CondIncl));
ci->next = cond_incl;
ci->ctx = IN_THEN;
ci->tok = tok;
ci->included = included;
cond_incl = ci;
return ci;
}
static Macro *find_macro(Token *tok) {
if (tok->kind != TK_IDENT) return NULL;
return hashmap_get2(&macros, tok->loc, tok->len);
}
static Macro *add_macro(char *name, bool is_objlike, Token *body) {
Macro *m = calloc(1, sizeof(Macro));
m->name = name;
m->is_objlike = is_objlike;
m->body = body;
hashmap_put(&macros, name, m);
return m;
}
static MacroParam *read_macro_params(Token **rest, Token *tok,
char **va_args_name) {
MacroParam head = {};
MacroParam *cur = &head;
while (!EQUAL(tok, ")")) {
2020-12-09 12:00:48 +00:00
if (cur != &head) tok = skip(tok, ',');
if (EQUAL(tok, "...")) {
2020-12-01 11:43:40 +00:00
*va_args_name = "__VA_ARGS__";
2020-12-09 12:00:48 +00:00
*rest = skip(tok->next, ')');
2020-12-01 11:43:40 +00:00
return head.next;
}
if (tok->kind == TK_JAVADOWN) {
tok = tok->next;
}
if (tok->kind != TK_IDENT) {
error_tok(tok, "expected an identifier");
}
if (EQUAL(tok->next, "...")) {
2020-12-01 11:43:40 +00:00
*va_args_name = strndup(tok->loc, tok->len);
2020-12-09 12:00:48 +00:00
*rest = skip(tok->next->next, ')');
2020-12-01 11:43:40 +00:00
return head.next;
}
MacroParam *m = calloc(1, sizeof(MacroParam));
m->name = strndup(tok->loc, tok->len);
cur = cur->next = m;
tok = tok->next;
}
*rest = tok->next;
return head.next;
}
2020-12-26 10:09:07 +00:00
static Macro *read_macro_definition(Token **rest, Token *tok) {
Macro *m;
char *name;
2020-12-01 11:43:40 +00:00
if (tok->kind != TK_IDENT) error_tok(tok, "macro name must be an identifier");
2020-12-26 10:09:07 +00:00
name = strndup(tok->loc, tok->len);
2020-12-01 11:43:40 +00:00
tok = tok->next;
if (!tok->has_space && tok->len == 1 && tok->loc[0] == '(') {
2020-12-01 11:43:40 +00:00
// Function-like macro
char *va_args_name = NULL;
MacroParam *params = read_macro_params(&tok, tok->next, &va_args_name);
2020-12-26 10:09:07 +00:00
m = add_macro(name, false, copy_line(rest, tok));
2020-12-01 11:43:40 +00:00
m->params = params;
m->va_args_name = va_args_name;
} else {
// Object-like macro
2020-12-26 10:09:07 +00:00
m = add_macro(name, true, copy_line(rest, tok));
2020-12-01 11:43:40 +00:00
}
2020-12-26 10:09:07 +00:00
return m;
2020-12-01 11:43:40 +00:00
}
static MacroArg *read_macro_arg_one(Token **rest, Token *tok, bool read_rest) {
Token head = {};
Token *cur = &head;
int level = 0;
for (;;) {
if (level == 0 && tok->len == 1 && tok->loc[0] == ')') {
break;
}
if (level == 0 && !read_rest && tok->len == 1 && tok->loc[0] == ',') {
break;
}
2020-12-01 11:43:40 +00:00
if (tok->kind == TK_EOF) error_tok(tok, "premature end of input");
if (tok->len == 1 && tok->loc[0] == '(') {
2020-12-01 11:43:40 +00:00
level++;
} else if (tok->len == 1 && tok->loc[0] == ')') {
2020-12-01 11:43:40 +00:00
level--;
}
2020-12-01 11:43:40 +00:00
cur = cur->next = copy_token(tok);
tok = tok->next;
}
cur->next = new_eof(tok);
MacroArg *arg = calloc(1, sizeof(MacroArg));
arg->tok = head.next;
*rest = tok;
return arg;
}
static MacroArg *read_macro_args(Token **rest, Token *tok, MacroParam *params,
char *va_args_name) {
Token *start = tok;
tok = tok->next->next;
MacroArg head = {};
MacroArg *cur = &head;
MacroParam *pp = params;
for (; pp; pp = pp->next) {
2020-12-09 12:00:48 +00:00
if (cur != &head) tok = skip(tok, ',');
2020-12-01 11:43:40 +00:00
cur = cur->next = read_macro_arg_one(&tok, tok, false);
cur->name = pp->name;
}
if (va_args_name) {
MacroArg *arg;
if (tok->len == 1 && tok->loc[0] == ')') {
2020-12-01 11:43:40 +00:00
arg = calloc(1, sizeof(MacroArg));
arg->tok = new_eof(tok);
} else {
2020-12-09 12:00:48 +00:00
if (pp != params) tok = skip(tok, ',');
2020-12-01 11:43:40 +00:00
arg = read_macro_arg_one(&tok, tok, true);
}
arg->name = va_args_name;
arg->is_va_args = true;
cur = cur->next = arg;
} else if (pp) {
error_tok(start, "too many arguments");
}
2020-12-09 12:00:48 +00:00
skip(tok, ')');
2020-12-01 11:43:40 +00:00
*rest = tok;
return head.next;
}
static MacroArg *find_arg(MacroArg *args, Token *tok) {
2020-12-09 12:00:48 +00:00
for (MacroArg *ap = args; ap; ap = ap->next) {
if (tok->len == strlen(ap->name) &&
!strncmp(tok->loc, ap->name, tok->len)) {
2020-12-01 11:43:40 +00:00
return ap;
2020-12-09 12:00:48 +00:00
}
}
2020-12-01 11:43:40 +00:00
return NULL;
}
// Concatenates all tokens in `tok` and returns a new string.
static char *join_tokens(Token *tok, Token *end) {
// Compute the length of the resulting token.
int len = 1;
for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) {
if (t != tok && t->has_space) len++;
len += t->len;
}
char *buf = calloc(1, len);
// Copy token texts.
int pos = 0;
for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) {
if (t != tok && t->has_space) buf[pos++] = ' ';
strncpy(buf + pos, t->loc, t->len);
pos += t->len;
}
buf[pos] = '\0';
return buf;
}
// Concatenates all tokens in `arg` and returns a new string token.
// This function is used for the stringizing operator (#).
static Token *stringize(Token *hash, Token *arg) {
// Create a new string token. We need to set some value to its
// source location for error reporting function, so we use a macro
// name token as a template.
char *s = join_tokens(arg, NULL);
return new_str_token(s, hash);
}
// Concatenate two tokens to create a new token.
static Token *paste(Token *lhs, Token *rhs) {
// Paste the two tokens.
2020-12-09 21:53:02 +00:00
char *buf = xasprintf("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc);
2020-12-01 11:43:40 +00:00
// Tokenize the resulting string.
Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf));
if (tok->next->kind != TK_EOF)
error_tok(lhs, "pasting forms '%s', an invalid token", buf);
return tok;
}
static bool has_varargs(MacroArg *args) {
for (MacroArg *ap = args; ap; ap = ap->next) {
if (!strcmp(ap->name, "__VA_ARGS__")) {
return ap->tok->kind != TK_EOF;
}
}
2020-12-01 11:43:40 +00:00
return false;
}
// Replace func-like macro parameters with given arguments.
static Token *subst(Token *tok, MacroArg *args) {
Token head = {};
Token *cur = &head;
while (tok->kind != TK_EOF) {
// "#" followed by a parameter is replaced with stringized actuals.
if (tok->len == 1 && tok->loc[0] == '#') {
2020-12-01 11:43:40 +00:00
MacroArg *arg = find_arg(args, tok->next);
if (!arg) {
2020-12-01 11:43:40 +00:00
error_tok(tok->next, "'#' is not followed by a macro parameter");
}
2020-12-01 11:43:40 +00:00
cur = cur->next = stringize(tok, arg->tok);
tok = tok->next->next;
continue;
}
// [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded
// to the empty token list. Otherwise, its expaned to `,` and
// __VA_ARGS__.
if (tok->len == 1 && tok->loc[0] == ',' &&
(tok->next->len == 2 &&
(tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) {
2020-12-01 11:43:40 +00:00
MacroArg *arg = find_arg(args, tok->next->next);
if (arg && arg->is_va_args) {
if (arg->tok->kind == TK_EOF) {
tok = tok->next->next->next;
} else {
cur = cur->next = copy_token(tok);
tok = tok->next->next;
}
continue;
}
}
if (tok->len == 2 && tok->loc[0] == '#' && tok->loc[1] == '#') {
2020-12-01 11:43:40 +00:00
if (cur == &head)
error_tok(tok, "'##' cannot appear at start of macro expansion");
if (tok->next->kind == TK_EOF)
error_tok(tok, "'##' cannot appear at end of macro expansion");
MacroArg *arg = find_arg(args, tok->next);
if (arg) {
if (arg->tok->kind != TK_EOF) {
*cur = *paste(cur, arg->tok);
for (Token *t = arg->tok->next; t->kind != TK_EOF; t = t->next)
cur = cur->next = copy_token(t);
}
tok = tok->next->next;
continue;
}
*cur = *paste(cur, tok->next);
tok = tok->next->next;
continue;
}
MacroArg *arg = find_arg(args, tok);
if (arg && (tok->next->len == 2 &&
(tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) {
2020-12-01 11:43:40 +00:00
Token *rhs = tok->next->next;
if (arg->tok->kind == TK_EOF) {
MacroArg *arg2 = find_arg(args, rhs);
if (arg2) {
for (Token *t = arg2->tok; t->kind != TK_EOF; t = t->next)
cur = cur->next = copy_token(t);
} else {
cur = cur->next = copy_token(rhs);
}
tok = rhs->next;
continue;
}
for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next)
cur = cur->next = copy_token(t);
tok = tok->next;
continue;
}
// If __VA_ARG__ is empty, __VA_OPT__(x) is expanded to the
// empty token list. Otherwise, __VA_OPT__(x) is expanded to x.
if (EQUAL(tok, "__VA_OPT__") && EQUAL(tok->next, "(")) {
2020-12-01 11:43:40 +00:00
MacroArg *arg = read_macro_arg_one(&tok, tok->next->next, true);
if (has_varargs(args))
for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next)
cur = cur->next = t;
2020-12-09 12:00:48 +00:00
tok = skip(tok, ')');
2020-12-01 11:43:40 +00:00
continue;
}
// Handle a macro token. Macro arguments are completely macro-expanded
// before they are substituted into a macro body.
if (arg) {
Token *t = preprocess2(arg->tok);
t->at_bol = tok->at_bol;
t->has_space = tok->has_space;
for (; t->kind != TK_EOF; t = t->next) cur = cur->next = copy_token(t);
tok = tok->next;
continue;
}
// Handle a non-macro token.
cur = cur->next = copy_token(tok);
tok = tok->next;
continue;
}
cur->next = tok;
return head.next;
}
// If tok is a macro, expand it and return true.
// Otherwise, do nothing and return false.
static bool expand_macro(Token **rest, Token *tok) {
if (hideset_contains(tok->hideset, tok->loc, tok->len)) return false;
Macro *m = find_macro(tok);
if (!m) return false;
// Built-in dynamic macro application such as __LINE__
if (m->handler) {
*rest = m->handler(tok);
(*rest)->next = tok->next;
return true;
}
// Object-like macro application
if (m->is_objlike) {
Hideset *hs = hideset_union(tok->hideset, new_hideset(m->name));
Token *body = add_hideset(m->body, hs);
for (Token *t = body; t->kind != TK_EOF; t = t->next) {
t->origin = tok;
}
2020-12-01 11:43:40 +00:00
*rest = append(body, tok->next);
(*rest)->at_bol = tok->at_bol;
(*rest)->has_space = tok->has_space;
return true;
}
// If a funclike macro token is not followed by an argument list,
// treat it as a normal identifier.
if (!(tok->next->len == 1 && tok->next->loc[0] == '(')) return false;
2020-12-01 11:43:40 +00:00
// Function-like macro application
Token *macro_token = tok;
MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name);
Token *rparen = tok;
// Tokens that consist a func-like macro invocation may have different
// hidesets, and if that's the case, it's not clear what the hideset
// for the new tokens should be. We take the interesection of the
// macro token and the closing parenthesis and use it as a new hideset
// as explained in the Dave Prossor's algorithm.
Hideset *hs = hideset_intersection(macro_token->hideset, rparen->hideset);
hs = hideset_union(hs, new_hideset(m->name));
Token *body = subst(m->body, args);
body = add_hideset(body, hs);
for (Token *t = body; t->kind != TK_EOF; t = t->next) t->origin = macro_token;
*rest = append(body, tok->next);
(*rest)->at_bol = macro_token->at_bol;
(*rest)->has_space = macro_token->has_space;
return true;
}
char *search_include_paths(char *filename) {
if (filename[0] == '/') return filename;
static HashMap cache;
char *cached = hashmap_get(&cache, filename);
if (cached) return cached;
// Search a file from the include paths.
for (int i = 0; i < include_paths.len; i++) {
Prove that Makefile is fully defined The whole repository is now buildable with GNU Make Landlock sandboxing. This proves that no Makefile targets exist which touch files other than their declared prerequisites. In order to do this, we had to: 1. Stop code morphing GCC output in package.com and instead run a newly introduced FIXUPOBJ.COM command after GCC invocations. 2. Disable all the crumby Python unit tests that do things like create files in the current directory, or rename() files between folders. This ended up being a lot of tests, but most of them are still ok. 3. Introduce an .UNSANDBOXED variable to GNU Make to disable Landlock. We currently only do this for things like `make tags`. 4. This change deletes some GNU Make code that was preventing the execve() optimization from working. This means it should no longer be necessary in most cases for command invocations to be indirected through the cocmd interpreter. 5. Missing dependencies had to be declared in certain places, in cases where they couldn't be automatically determined by MKDEPS.COM 6. The libcxx header situation has finally been tamed. One of the things that makes this difficult is MKDEPS.COM only wants to consider the first 64kb of a file, in order to go fast. But libcxx likes to have #include lines buried after huge documentation. 7. An .UNVEIL variable has been introduced to GNU Make just in case we ever wish to explicitly specify additional things that need to be whitelisted which aren't strictly prerequisites. This works in a manner similar to the recently introduced .EXTRA_PREREQS feature. There's now a new build/bootstrap/make.com prebuilt binary available. It should no longer be possible to write invalid Makefile code.
2022-08-06 10:51:50 +00:00
char *path = xjoinpaths(include_paths.data[i], filename);
if (!fileexists(path)) continue;
2020-12-01 11:43:40 +00:00
hashmap_put(&cache, filename, path);
include_next_idx = i + 1;
return path;
}
return NULL;
}
static char *search_include_next(char *filename) {
for (; include_next_idx < include_paths.len; include_next_idx++) {
char *path =
xasprintf("%s/%s", include_paths.data[include_next_idx], filename);
if (fileexists(path)) return path;
2020-12-01 11:43:40 +00:00
}
return NULL;
}
// Read an #include argument.
static char *read_include_filename(Token **rest, Token *tok, bool *is_dquote) {
// Pattern 1: #include "foo.h"
if (tok->kind == TK_STR) {
// A double-quoted filename for #include is a special kind of
// token, and we don't want to interpret any escape sequences in it.
// For example, "\f" in "C:\foo" is not a formfeed character but
// just two non-control characters, backslash and f.
// So we don't want to use token->str.
*is_dquote = true;
*rest = skip_line(tok->next);
return strndup(tok->loc + 1, tok->len - 2);
}
// Pattern 2: #include <foo.h>
if (EQUAL(tok, "<")) {
2020-12-01 11:43:40 +00:00
// Reconstruct a filename from a sequence of tokens between
// "<" and ">".
Token *start = tok;
// Find closing ">".
for (; !EQUAL(tok, ">"); tok = tok->next)
2020-12-01 11:43:40 +00:00
if (tok->at_bol || tok->kind == TK_EOF) error_tok(tok, "expected '>'");
*is_dquote = false;
*rest = skip_line(tok->next);
return join_tokens(start->next, tok);
}
// Pattern 3: #include FOO
// In this case FOO must be macro-expanded to either
// a single string token or a sequence of "<" ... ">".
if (tok->kind == TK_IDENT) {
Token *tok2 = preprocess2(copy_line(rest, tok));
return read_include_filename(&tok2, tok2, is_dquote);
}
error_tok(tok, "expected a filename");
}
// Detect the following "include guard" pattern.
//
// #ifndef FOO_H
// #define FOO_H
// ...
// #endif
static char *detect_include_guard(Token *tok) {
// Detect the first two lines.
if (!is_hash(tok) || !EQUAL(tok->next, "ifndef")) return NULL;
2020-12-01 11:43:40 +00:00
tok = tok->next->next;
if (tok->kind != TK_IDENT) return NULL;
char *macro = strndup(tok->loc, tok->len);
tok = tok->next;
if (!is_hash(tok) || !EQUAL(tok->next, "define") ||
2020-12-24 07:42:56 +00:00
!equal(tok->next->next, macro, strlen(macro)))
2020-12-01 11:43:40 +00:00
return NULL;
// Read until the end of the file.
while (tok->kind != TK_EOF) {
if (!is_hash(tok)) {
tok = tok->next;
continue;
}
if (EQUAL(tok->next, "endif") && tok->next->next->kind == TK_EOF)
2020-12-01 11:43:40 +00:00
return macro;
if (EQUAL(tok, "if") || EQUAL(tok, "ifdef") || EQUAL(tok, "ifndef"))
2020-12-01 11:43:40 +00:00
tok = skip_cond_incl(tok->next);
else
tok = tok->next;
}
return NULL;
}
2020-12-09 21:53:02 +00:00
static Token *include_file(Token *tok, char *path, Token *filename_tok) {
2020-12-01 11:43:40 +00:00
// Check for "#pragma once"
if (hashmap_get(&pragma_once, path)) return tok;
// If we read the same file before, and if the file was guarded
// by the usual #ifndef ... #endif pattern, we may be able to
// skip the file without opening it.
static HashMap include_guards;
char *guard_name = hashmap_get(&include_guards, path);
if (guard_name && hashmap_get(&macros, guard_name)) return tok;
Token *tok2 = tokenize_file(path);
2020-12-09 21:53:02 +00:00
if (!tok2)
error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno));
2020-12-01 11:43:40 +00:00
guard_name = detect_include_guard(tok2);
if (guard_name) hashmap_put(&include_guards, path, guard_name);
return append(tok2, tok);
}
// Read #line arguments
static void read_line_marker(Token **rest, Token *tok) {
// TODO: This is broken if file is different? See gperf codegen.
2020-12-01 11:43:40 +00:00
Token *start = tok;
tok = preprocess(copy_line(rest, tok));
if (tok->kind != TK_NUM || tok->ty->kind != TY_INT)
error_tok(tok, "invalid line marker");
start->file->line_delta = tok->val - start->line_no;
tok = tok->next;
if (tok->kind == TK_EOF) return;
if (tok->kind != TK_STR) error_tok(tok, "filename expected");
start->file->display_name = tok->str;
}
// Visit all tokens in `tok` while evaluating preprocessing
// macros and directives.
static Token *preprocess2(Token *tok) {
unsigned char kw;
2020-12-01 11:43:40 +00:00
Token head = {};
Token *cur = &head;
while (tok->kind != TK_EOF) {
// If it is a macro, expand it.
if (expand_macro(&tok, tok)) continue;
2020-12-26 10:09:07 +00:00
// make sure javadown is removed if it's for a macro definition
if (tok->kind == TK_JAVADOWN && is_hash(tok->next) &&
EQUAL(tok->next->next, "define")) {
read_macro_definition(&tok, tok->next->next->next)->javadown = tok;
continue;
}
2020-12-01 11:43:40 +00:00
// Pass through if it is not a "#".
if (!is_hash(tok)) {
tok->line_delta = tok->file->line_delta;
tok->filename = tok->file->display_name;
cur = cur->next = tok;
tok = tok->next;
continue;
}
Token *start = tok;
tok = tok->next;
if ((kw = GetKw(tok->loc, tok->len))) {
if (kw == KW_INCLUDE) {
bool is_dquote;
char *filename = read_include_filename(&tok, tok->next, &is_dquote);
if (filename[0] != '/' && is_dquote) {
char *tmp = strdup(start->file->name);
char *path = xasprintf("%s/%s", dirname(tmp), filename);
free(tmp);
bool exists = fileexists(path);
if (exists) {
tok = include_file(tok, path, start->next->next);
free(path);
continue;
}
free(path);
2020-12-01 11:43:40 +00:00
}
char *path = search_include_paths(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (kw == KW_INCLUDE_NEXT) {
bool ignore;
char *filename = read_include_filename(&tok, tok->next, &ignore);
char *path = search_include_next(filename);
tok = include_file(tok, path ? path : filename, start->next->next);
continue;
}
if (kw == KW_DEFINE) {
read_macro_definition(&tok, tok->next);
continue;
}
if (kw == KW_UNDEF) {
tok = tok->next;
if (tok->kind != TK_IDENT)
error_tok(tok, "macro name must be an identifier");
undef_macro(strndup(tok->loc, tok->len));
tok = skip_line(tok->next);
continue;
}
if (kw == KW_IF) {
long val = eval_const_expr(&tok, tok);
push_cond_incl(start, val);
if (!val) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_IFDEF) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, defined);
tok = skip_line(tok->next->next);
if (!defined) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_IFNDEF) {
bool defined = find_macro(tok->next);
push_cond_incl(tok, !defined);
tok = skip_line(tok->next->next);
if (defined) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ELIF) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #elif");
cond_incl->ctx = IN_ELIF;
if (!cond_incl->included && eval_const_expr(&tok, tok))
cond_incl->included = true;
else
tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ELSE) {
if (!cond_incl || cond_incl->ctx == IN_ELSE)
error_tok(start, "stray #else");
cond_incl->ctx = IN_ELSE;
tok = skip_line(tok->next);
if (cond_incl->included) tok = skip_cond_incl(tok);
continue;
}
if (kw == KW_ENDIF) {
if (!cond_incl) error_tok(start, "stray #endif");
cond_incl = cond_incl->next;
tok = skip_line(tok->next);
continue;
}
if (kw == KW_LINE) {
read_line_marker(&tok, tok->next);
continue;
2020-12-01 11:43:40 +00:00
}
}
if (tok->kind == TK_PP_NUM) {
read_line_marker(&tok, tok);
continue;
}
if (kw == KW_PRAGMA && EQUAL(tok->next, "once")) {
2020-12-01 11:43:40 +00:00
hashmap_put(&pragma_once, tok->file->name, (void *)1);
tok = skip_line(tok->next->next);
continue;
}
if (kw == KW_PRAGMA) {
2020-12-01 11:43:40 +00:00
do {
tok = tok->next;
} while (!tok->at_bol);
continue;
}
if (kw == KW_ERROR) {
error_tok(tok, "error");
}
2020-12-01 11:43:40 +00:00
// `#`-only line is legal. It's called a null directive.
if (tok->at_bol) continue;
error_tok(tok, "invalid preprocessor directive");
}
cur->next = tok;
return head.next;
}
void define_macro(char *name, char *buf) {
Token *tok = tokenize(new_file("<built-in>", 1, buf));
add_macro(name, true, tok);
}
void undef_macro(char *name) {
hashmap_delete(&macros, name);
}
static Macro *add_builtin(char *name, macro_handler_fn *fn) {
Macro *m = add_macro(name, true, NULL);
m->handler = fn;
return m;
}
static Token *file_macro(Token *tmpl) {
while (tmpl->origin) tmpl = tmpl->origin;
return new_str_token(tmpl->file->display_name, tmpl);
}
static Token *line_macro(Token *tmpl) {
while (tmpl->origin) tmpl = tmpl->origin;
int i = tmpl->line_no + tmpl->file->line_delta;
return new_num_token(i, tmpl);
}
// __COUNTER__ is expanded to serial values starting from 0.
static Token *counter_macro(Token *tmpl) {
static int i = 0;
return new_num_token(i++, tmpl);
}
// __TIMESTAMP__ is expanded to a string describing the last
// modification time of the current file. E.g.
// "Fri Jul 24 01:32:50 2020"
static Token *timestamp_macro(Token *tmpl) {
struct stat st;
if (stat(tmpl->file->name, &st) != 0)
return new_str_token("??? ??? ?? ??:??:?? ????", tmpl);
char buf[64];
ctime_r(&st.st_mtim.tv_sec, buf);
2020-12-01 11:43:40 +00:00
buf[24] = '\0';
return new_str_token(buf, tmpl);
}
static Token *base_file_macro(Token *tmpl) {
return new_str_token(base_file, tmpl);
}
// __DATE__ is expanded to the current date, e.g. "May 17 2020".
static char *format_date(struct tm *tm) {
return xasprintf("\"%s %2d %d\"", kMonthNameShort[tm->tm_mon], tm->tm_mday,
2020-12-09 21:53:02 +00:00
tm->tm_year + 1900);
2020-12-01 11:43:40 +00:00
}
// __TIME__ is expanded to the current time, e.g. "13:34:03".
static char *format_time(struct tm *tm) {
2020-12-09 21:53:02 +00:00
return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec);
2020-12-01 11:43:40 +00:00
}
void init_macros_conditional(void) {
if (opt_pg) define_macro("__PG__", "1");
if (opt_pic) define_macro("__PIC__", "1");
if (opt_sse3) define_macro("__SSE3__", "1");
if (opt_sse4) define_macro("__SSE4__", "1");
if (opt_popcnt) define_macro("__POPCNT__", "1");
if (opt_fentry) define_macro("__MFENTRY__", "1");
}
2020-12-01 11:43:40 +00:00
void init_macros(void) {
char *val, *name = "\
__chibicc__\000\
1\000\
__cosmopolitan__\000\
1\000\
__GNUC__\000\
2020-12-09 12:00:48 +00:00
9\000\
__GNUC_MINOR__\000\
2020-12-09 12:00:48 +00:00
0\000\
__GNUC_PATCHLEVEL__\000\
2020-12-09 12:00:48 +00:00
0\000\
__NO_INLINE__\000\
16\000\
2020-12-26 10:09:07 +00:00
__GNUC_STDC_INLINE__\000\
1\000\
__BIGGEST_ALIGNMENT__\000\
16\000\
__C99_MACRO_WITH_VA_ARGS\000\
1\000\
__GCC_ASM_FLAG_OUTPUTS__\000\
1\000\
__ELF__\000\
1\000\
__LP64__\000\
1\000\
_LP64\000\
1\000\
__STDC__\000\
1\000\
__STDC_HOSTED__\000\
1\000\
__STDC_NO_COMPLEX__\000\
1\000\
__STDC_UTF_16__\000\
1\000\
__STDC_UTF_32__\000\
1\000\
__STDC_VERSION__\000\
201112L\000\
__USER_LABEL_PREFIX__\000\
\000\
__alignof__\000\
_Alignof\000\
__const__\000\
const\000\
__inline__\000\
inline\000\
__signed__\000\
signed\000\
__typeof__\000\
typeof\000\
__volatile__\000\
volatile\000\
__unix\000\
1\000\
__unix__\000\
1\000\
__gnu_linux__\000\
1\000\
__BYTE_ORDER__\000\
1234\000\
__FLOAT_WORD_ORDER__\000\
1234\000\
__ORDER_BIG_ENDIAN__\000\
4321\000\
__ORDER_LITTLE_ENDIAN__\000\
1234\000\
__INT8_MAX__\000\
0x7f\000\
__UINT8_MAX__\000\
0xff\000\
__INT16_MAX__\000\
0x7fff\000\
__UINT16_MAX__\000\
0xffff\000\
__SHRT_MAX__\000\
0x7fff\000\
__INT_MAX__\000\
0x7fffffff\000\
__INT32_MAX__\000\
0x7fffffff\000\
__UINT32_MAX__\000\
0xffffffffu\000\
__INT64_MAX__\000\
0x7fffffffffffffffl\000\
__INTMAX_MAX__\000\
0x7fffffffffffffffl\000\
2020-12-26 10:09:07 +00:00
__LONG_MAX__\000\
0x7fffffffffffffffl\000\
__LONG_LONG_MAX__\000\
0x7fffffffffffffffl\000\
__UINT64_MAX__\000\
0xfffffffffffffffful\000\
__UINTMAX_MAX__\000\
0xfffffffffffffffful\000\
__SIZE_MAX__\000\
0xfffffffffffffffful\000\
__INTPTR_MAX__\000\
0x7fffffffffffffffl\000\
__UINTPTR_MAX__\000\
0xfffffffffffffffful\000\
__WINT_MAX__\000\
0xffffffffu\000\
__CHAR_BIT__\000\
8\000\
__SIZEOF_SHORT__\000\
2\000\
__SIZEOF_INT__\000\
4\000\
__SIZEOF_LONG__\000\
8\000\
__SIZEOF_LONG_LONG__\000\
8\000\
__SIZEOF_POINTER__\000\
8\000\
__SIZEOF_PTRDIFF_T__\000\
8\000\
__SIZEOF_SIZE_T__\000\
8\000\
__SIZEOF_WCHAR_T__\000\
4\000\
__SIZEOF_WINT_T__\000\
4\000\
__SIZEOF_FLOAT__\000\
4\000\
__SIZEOF_FLOAT128__\000\
16\000\
__SIZEOF_DOUBLE__\000\
8\000\
__SIZEOF_FLOAT80__\000\
16\000\
__SIZEOF_LONG_DOUBLE__\000\
16\000\
__INT8_TYPE__\000\
signed char\000\
__UINT8_TYPE__\000\
unsigned char\000\
__INT16_TYPE__\000\
short int\000\
__UINT16_TYPE__\000\
short unsigned int\000\
__INT32_TYPE__\000\
int\000\
__UINT32_TYPE__\000\
unsigned int\000\
__INT64_TYPE__\000\
long int\000\
__INTMAX_TYPE__\000\
long int\000\
__UINT64_TYPE__\000\
long unsigned int\000\
__UINTMAX_TYPE__\000\
long unsigned int\000\
__INTMAX_TYPE__\000\
long int\000\
__UINTMAX_TYPE__\000\
long unsigned int\000\
__INTPTR_TYPE__\000\
long int\000\
__UINTPTR_TYPE__\000\
long unsigned int\000\
__PTRDIFF_TYPE__\000\
long int\000\
__SIZE_TYPE__\000\
long unsigned int\000\
__WCHAR_TYPE__\000\
int\000\
__CHAR16_TYPE__\000\
short unsigned int\000\
__CHAR32_TYPE__\000\
unsigned int\000\
__WINT_TYPE__\000\
unsigned int\000\
__CHAR16_TYPE__\000\
short unsigned int\000\
__WCHAR_TYPE__\000\
int\000\
__CHAR32_TYPE__\000\
unsigned int\000\
__INT_LEAST8_TYPE__\000\
signed char\000\
__UINT_LEAST8_TYPE__\000\
unsigned char\000\
__INT_LEAST16_TYPE__\000\
int\000\
__UINT_LEAST16_TYPE__\000\
unsigned short\000\
__INT_LEAST32_TYPE__\000\
short\000\
__UINT_LEAST32_TYPE__\000\
unsigned int\000\
__INT_LEAST64_TYPE__\000\
long\000\
__UINT_LEAST64_TYPE__\000\
unsigned long\000\
__INT_FAST8_TYPE__\000\
signed char\000\
__UINT_FAST8_TYPE__\000\
unsigned char\000\
__INT_FAST16_TYPE__\000\
int\000\
__UINT_FAST16_TYPE__\000\
unsigned\000\
__INT_FAST32_TYPE__\000\
int\000\
__UINT_FAST32_TYPE__\000\
unsigned\000\
2022-05-17 14:40:00 +00:00
__INT_FAST8_MAX__\000\
0x7f\000\
__INT_FAST16_MAX__\000\
0x7fffffff\000\
__INT_FAST32_MAX__\000\
0x7fffffff\000\
__INT_FAST64_MAX__\000\
0x7fffffffffffffffl\000\
__INT_FAST64_TYPE__\000\
long\000\
__UINT_FAST64_TYPE__\000\
unsigned long\000\
__DBL_DECIMAL_DIG__\000\
17\000\
__DBL_DENORM_MIN__\000\
((double)4.94065645841246544176568792868221372e-324L)\000\
__DBL_DIG__\000\
15\000\
__DBL_EPSILON__\000\
((double)2.22044604925031308084726333618164062e-16L)\000\
__DBL_HAS_DENORM__\000\
1\000\
__DBL_HAS_INFINITY__\000\
1\000\
__DBL_HAS_QUIET_NAN__\000\
1\000\
__DBL_MANT_DIG__\000\
53\000\
__DBL_MAX_10_EXP__\000\
308\000\
__DBL_MAX_EXP__\000\
1024\000\
__DBL_MAX__\000\
((double)1.79769313486231570814527423731704357e+308L)\000\
__DBL_MIN_10_EXP__\000\
(-307)\000\
__DBL_MIN_EXP__\000\
(-1021)\000\
__DBL_MIN__\000\
((double)2.22507385850720138309023271733240406e-308L)\000\
__FLT_DECIMAL_DIG__\000\
9\000\
__FLT_DENORM_MIN__\000\
1.40129846432481707092372958328991613e-45F\000\
__FLT_DIG__\000\
6\000\
__FLT_EPSILON__\000\
1.19209289550781250000000000000000000e-7F\000\
__FLT_EVAL_METHOD_TS_18661_3__\000\
0\000\
__FLT_EVAL_METHOD__\000\
0\000\
__FLT_HAS_DENORM__\000\
1\000\
__FLT_HAS_INFINITY__\000\
1\000\
__FLT_HAS_QUIET_NAN__\000\
1\000\
__FLT_MANT_DIG__\000\
24\000\
__FLT_MAX_10_EXP__\000\
38\000\
__FLT_MAX_EXP__\000\
128\000\
__FLT_MAX__\000\
3.40282346638528859811704183484516925e+38F\000\
__FLT_MIN_10_EXP__\000\
(-37)\000\
__FLT_MIN_EXP__\000\
(-125)\000\
__FLT_MIN__\000\
1.17549435082228750796873653722224568e-38F\000\
__FLT_RADIX__\000\
2\000\
__LDBL_DECIMAL_DIG__\000\
21\000\
__LDBL_DENORM_MIN__\000\
3.64519953188247460252840593361941982e-4951L\000\
__LDBL_DIG__\000\
18\000\
__LDBL_EPSILON__\000\
1.08420217248550443400745280086994171e-19L\000\
__LDBL_HAS_DENORM__\000\
1\000\
__LDBL_HAS_INFINITY__\000\
1\000\
__LDBL_HAS_QUIET_NAN__\000\
1\000\
__LDBL_MANT_DIG__\000\
64\000\
__LDBL_MAX_10_EXP__\000\
4932\000\
__LDBL_MAX_EXP__\000\
16384\000\
__LDBL_MAX__\000\
1.18973149535723176502126385303097021e+4932L\000\
__LDBL_MIN_10_EXP__\000\
(-4931)\000\
__LDBL_MIN_EXP__\000\
(-16381)\000\
__LDBL_MIN__\000\
3.36210314311209350626267781732175260e-4932L\000\
__x86_64\000\
1\000\
__x86_64__\000\
1\000\
__amd64\000\
1\000\
__amd64__\000\
1\000\
__MMX__\000\
1\000\
__SSE__\000\
1\000\
__SSE_MATH__\000\
1\000\
__SSE2__\000\
1\000\
__SSE2_MATH__\000\
1\000\
__ATOMIC_ACQUIRE\000\
2\000\
__ATOMIC_HLE_RELEASE\000\
131072\000\
__ATOMIC_HLE_ACQUIRE\000\
65536\000\
__ATOMIC_RELAXED\000\
0\000\
__ATOMIC_CONSUME\000\
1\000\
__ATOMIC_SEQ_CST\000\
5\000\
__ATOMIC_ACQ_REL\000\
4\000\
__ATOMIC_RELEASE\000\
3\000\
\000";
do {
val = name + strlen(name) + 1;
define_macro(name, val);
name = val + strlen(val) + 1;
} while (*name);
2020-12-01 11:43:40 +00:00
add_builtin("__FILE__", file_macro);
add_builtin("__LINE__", line_macro);
add_builtin("__COUNTER__", counter_macro);
add_builtin("__TIMESTAMP__", timestamp_macro);
add_builtin("__BASE_FILE__", base_file_macro);
time_t now = time(NULL);
struct tm *tm = localtime(&now);
define_macro("__DATE__", format_date(tm));
define_macro("__TIME__", format_time(tm));
}
static StringKind getStringKind(Token *tok) {
if (!strcmp(tok->loc, "u8")) return STR_UTF8;
switch (tok->loc[0]) {
case '"':
return STR_NONE;
case 'u':
return STR_UTF16;
case 'U':
return STR_UTF32;
case 'L':
return STR_WIDE;
}
UNREACHABLE();
}
// Concatenate adjacent string literals into a single string literal
// as per the C spec.
static void join_adjacent_string_literals(Token *tok) {
// First pass: If regular string literals are adjacent to wide
// string literals, regular string literals are converted to a wide
// type before concatenation. In this pass, we do the conversion.
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
tok1 = tok1->next;
continue;
}
StringKind kind = getStringKind(tok1);
Type *basety = tok1->ty->base;
for (Token *t = tok1->next; t->kind == TK_STR; t = t->next) {
StringKind k = getStringKind(t);
if (kind == STR_NONE) {
kind = k;
basety = t->ty->base;
} else if (k != STR_NONE && kind != k) {
error_tok(t,
"unsupported non-standard concatenation of string literals");
}
}
2020-12-09 12:00:48 +00:00
if (basety->size > 1) {
for (Token *t = tok1; t->kind == TK_STR; t = t->next) {
if (t->ty->base->size == 1) {
*t = *tokenize_string_literal(t, basety);
}
}
}
2020-12-01 11:43:40 +00:00
while (tok1->kind == TK_STR) tok1 = tok1->next;
}
// Second pass: concatenate adjacent string literals.
for (Token *tok1 = tok; tok1->kind != TK_EOF;) {
2020-12-09 21:53:02 +00:00
if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) {
2020-12-01 11:43:40 +00:00
tok1 = tok1->next;
continue;
}
2020-12-09 21:53:02 +00:00
Token *tok2 = tok1->next;
while (tok2->kind == TK_STR) tok2 = tok2->next;
int len = tok1->ty->array_len;
for (Token *t = tok1->next; t != tok2; t = t->next) {
len = len + t->ty->array_len - 1;
}
char *buf = calloc(tok1->ty->base->size, len);
int j = 0;
2020-12-09 21:53:02 +00:00
int i = 0;
for (Token *t = tok1; t != tok2; t = t->next) {
++j;
2020-12-09 21:53:02 +00:00
memcpy(buf + i, t->str, t->ty->size);
i = i + t->ty->size - t->ty->base->size;
}
*tok1 = *copy_token(tok1);
tok1->ty = array_of(tok1->ty->base, len);
tok1->str = buf;
tok1->next = tok2;
tok1 = tok2;
2020-12-01 11:43:40 +00:00
}
}
// Entry point function of the preprocessor.
Token *preprocess(Token *tok) {
tok = preprocess2(tok);
if (cond_incl) {
2020-12-01 11:43:40 +00:00
error_tok(cond_incl->tok, "unterminated conditional directive");
}
2020-12-01 11:43:40 +00:00
convert_pp_tokens(tok);
join_adjacent_string_literals(tok);
for (Token *t = tok; t; t = t->next) {
t->line_no += t->line_delta;
}
2020-12-01 11:43:40 +00:00
return tok;
}