mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-29 05:50:27 +00:00
Make improvements
This commit is contained in:
parent
3e4fd4b0ad
commit
e44a0cf6f8
256 changed files with 23100 additions and 2294 deletions
21
third_party/chibicc/LICENSE
vendored
Normal file
21
third_party/chibicc/LICENSE
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2019 Rui Ueyama
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
720
third_party/chibicc/chibicc.c
vendored
Normal file
720
third_party/chibicc/chibicc.c
vendored
Normal file
|
@ -0,0 +1,720 @@
|
|||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
typedef enum {
|
||||
FILE_NONE,
|
||||
FILE_C,
|
||||
FILE_ASM,
|
||||
FILE_OBJ,
|
||||
FILE_AR,
|
||||
FILE_DSO,
|
||||
} FileType;
|
||||
|
||||
StringArray include_paths;
|
||||
bool opt_fcommon = true;
|
||||
bool opt_fpic;
|
||||
|
||||
static FileType opt_x;
|
||||
static StringArray opt_include;
|
||||
static bool opt_E;
|
||||
static bool opt_M;
|
||||
static bool opt_MD;
|
||||
static bool opt_MMD;
|
||||
static bool opt_MP;
|
||||
static bool opt_S;
|
||||
static bool opt_c;
|
||||
static bool opt_cc1;
|
||||
static bool opt_hash_hash_hash;
|
||||
static bool opt_static;
|
||||
static bool opt_shared;
|
||||
static char *opt_MF;
|
||||
static char *opt_MT;
|
||||
static char *opt_o;
|
||||
|
||||
static StringArray ld_extra_args;
|
||||
static StringArray std_include_paths;
|
||||
|
||||
char *base_file;
|
||||
static char *output_file;
|
||||
|
||||
static StringArray input_paths;
|
||||
|
||||
static char **tmpfiles;
|
||||
|
||||
static void usage(int status) {
|
||||
fprintf(stderr, "chibicc [ -o <path> ] <file>\n");
|
||||
exit(status);
|
||||
}
|
||||
|
||||
static bool take_arg(char *arg) {
|
||||
char *x[] = {
|
||||
"-o", "-I", "-idirafter", "-include", "-x", "-MF", "-MT", "-Xlinker",
|
||||
};
|
||||
|
||||
for (int i = 0; i < sizeof(x) / sizeof(*x); i++)
|
||||
if (!strcmp(arg, x[i])) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void add_default_include_paths(char *argv0) {
|
||||
// We expect that chibicc-specific include files are installed
|
||||
// to ./include relative to argv[0].
|
||||
char *buf = calloc(1, strlen(argv0) + 10);
|
||||
sprintf(buf, "%s/include", dirname(strdup(argv0)));
|
||||
strarray_push(&include_paths, buf);
|
||||
|
||||
// Add standard include paths.
|
||||
strarray_push(&include_paths, ".");
|
||||
|
||||
// Keep a copy of the standard include paths for -MMD option.
|
||||
for (int i = 0; i < include_paths.len; i++)
|
||||
strarray_push(&std_include_paths, include_paths.data[i]);
|
||||
}
|
||||
|
||||
static void define(char *str) {
|
||||
char *eq = strchr(str, '=');
|
||||
if (eq)
|
||||
define_macro(strndup(str, eq - str), eq + 1);
|
||||
else
|
||||
define_macro(str, "1");
|
||||
}
|
||||
|
||||
static FileType parse_opt_x(char *s) {
|
||||
if (!strcmp(s, "c")) return FILE_C;
|
||||
if (!strcmp(s, "assembler")) return FILE_ASM;
|
||||
if (!strcmp(s, "none")) return FILE_NONE;
|
||||
error("<command line>: unknown argument for -x: %s", s);
|
||||
}
|
||||
|
||||
static char *quote_makefile(char *s) {
|
||||
char *buf = calloc(1, strlen(s) * 2 + 1);
|
||||
|
||||
for (int i = 0, j = 0; s[i]; i++) {
|
||||
switch (s[i]) {
|
||||
case '$':
|
||||
buf[j++] = '$';
|
||||
buf[j++] = '$';
|
||||
break;
|
||||
case '#':
|
||||
buf[j++] = '\\';
|
||||
buf[j++] = '#';
|
||||
break;
|
||||
case ' ':
|
||||
case '\t':
|
||||
for (int k = i - 1; k >= 0 && s[k] == '\\'; k--) buf[j++] = '\\';
|
||||
buf[j++] = '\\';
|
||||
buf[j++] = s[i];
|
||||
break;
|
||||
default:
|
||||
buf[j++] = s[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
// Make sure that all command line options that take an argument
|
||||
// have an argument.
|
||||
for (int i = 1; i < argc; i++)
|
||||
if (take_arg(argv[i]))
|
||||
if (!argv[++i]) usage(1);
|
||||
|
||||
StringArray idirafter = {};
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-###")) {
|
||||
opt_hash_hash_hash = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-cc1")) {
|
||||
opt_cc1 = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "--help")) usage(0);
|
||||
|
||||
if (!strcmp(argv[i], "-o")) {
|
||||
opt_o = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-o", 2)) {
|
||||
opt_o = argv[i] + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-S")) {
|
||||
opt_S = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-fcommon")) {
|
||||
opt_fcommon = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-fno-common")) {
|
||||
opt_fcommon = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-c")) {
|
||||
opt_c = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-E")) {
|
||||
opt_E = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-I", 2)) {
|
||||
strarray_push(&include_paths, argv[i] + 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-D")) {
|
||||
define(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-D", 2)) {
|
||||
define(argv[i] + 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-U")) {
|
||||
undef_macro(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-U", 2)) {
|
||||
undef_macro(argv[i] + 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-include")) {
|
||||
strarray_push(&opt_include, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-x")) {
|
||||
opt_x = parse_opt_x(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-x", 2)) {
|
||||
opt_x = parse_opt_x(argv[i] + 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-l", 2) || !strncmp(argv[i], "-Wl,", 4)) {
|
||||
strarray_push(&input_paths, argv[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-Xlinker")) {
|
||||
strarray_push(&ld_extra_args, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-s")) {
|
||||
strarray_push(&ld_extra_args, "-s");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-M")) {
|
||||
opt_M = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MF")) {
|
||||
opt_MF = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MP")) {
|
||||
opt_MP = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MT")) {
|
||||
if (opt_MT == NULL)
|
||||
opt_MT = argv[++i];
|
||||
else
|
||||
opt_MT = format("%s %s", opt_MT, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MD")) {
|
||||
opt_MD = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MQ")) {
|
||||
if (opt_MT == NULL)
|
||||
opt_MT = quote_makefile(argv[++i]);
|
||||
else
|
||||
opt_MT = format("%s %s", opt_MT, quote_makefile(argv[++i]));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-MMD")) {
|
||||
opt_MD = opt_MMD = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-fpic") || !strcmp(argv[i], "-fPIC")) {
|
||||
opt_fpic = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-cc1-input")) {
|
||||
base_file = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-cc1-output")) {
|
||||
output_file = argv[++i];
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-idirafter")) {
|
||||
strarray_push(&idirafter, argv[i++]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-static")) {
|
||||
opt_static = true;
|
||||
strarray_push(&ld_extra_args, "-static");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-shared")) {
|
||||
opt_shared = true;
|
||||
strarray_push(&ld_extra_args, "-shared");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-L")) {
|
||||
strarray_push(&ld_extra_args, "-L");
|
||||
strarray_push(&ld_extra_args, argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(argv[i], "-L", 2)) {
|
||||
strarray_push(&ld_extra_args, "-L");
|
||||
strarray_push(&ld_extra_args, argv[i] + 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strcmp(argv[i], "-hashmap-test")) {
|
||||
hashmap_test();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// These options are ignored for now.
|
||||
if (!strncmp(argv[i], "-O", 2) || !strncmp(argv[i], "-W", 2) ||
|
||||
!strncmp(argv[i], "-g", 2) || !strncmp(argv[i], "-std=", 5) ||
|
||||
!strcmp(argv[i], "-ffreestanding") ||
|
||||
!strcmp(argv[i], "-fno-builtin") ||
|
||||
!strcmp(argv[i], "-fno-omit-frame-pointer") ||
|
||||
!strcmp(argv[i], "-fno-stack-protector") ||
|
||||
!strcmp(argv[i], "-fno-strict-aliasing") || !strcmp(argv[i], "-m64") ||
|
||||
!strcmp(argv[i], "-mno-red-zone") || !strcmp(argv[i], "-w"))
|
||||
continue;
|
||||
|
||||
if (argv[i][0] == '-' && argv[i][1] != '\0')
|
||||
error("unknown argument: %s", argv[i]);
|
||||
|
||||
strarray_push(&input_paths, argv[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < idirafter.len; i++)
|
||||
strarray_push(&include_paths, idirafter.data[i]);
|
||||
|
||||
if (input_paths.len == 0) error("no input files");
|
||||
|
||||
// -E implies that the input is the C macro language.
|
||||
if (opt_E) opt_x = FILE_C;
|
||||
}
|
||||
|
||||
static FILE *open_file(char *path) {
|
||||
if (!path || strcmp(path, "-") == 0) return stdout;
|
||||
|
||||
FILE *out = fopen(path, "w");
|
||||
if (!out) error("cannot open output file: %s: %s", path, strerror(errno));
|
||||
return out;
|
||||
}
|
||||
|
||||
static bool ends_with(char *p, char *q) {
|
||||
int len1 = strlen(p);
|
||||
int len2 = strlen(q);
|
||||
return (len1 >= len2) && !strcmp(p + len1 - len2, q);
|
||||
}
|
||||
|
||||
// Replace file extension
|
||||
static char *replace_extn(char *tmpl, char *extn) {
|
||||
char *filename = basename(strdup(tmpl));
|
||||
int len1 = strlen(filename);
|
||||
int len2 = strlen(extn);
|
||||
char *buf = calloc(1, len1 + len2 + 2);
|
||||
|
||||
char *dot = strrchr(filename, '.');
|
||||
if (dot) *dot = '\0';
|
||||
sprintf(buf, "%s%s", filename, extn);
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void cleanup(void) {
|
||||
if (tmpfiles)
|
||||
for (int i = 0; tmpfiles[i]; i++) unlink(tmpfiles[i]);
|
||||
}
|
||||
|
||||
static char *create_tmpfile(void) {
|
||||
char tmpl[] = "/tmp/chibicc-XXXXXX";
|
||||
char *path = calloc(1, sizeof(tmpl));
|
||||
memcpy(path, tmpl, sizeof(tmpl));
|
||||
|
||||
int fd = mkstemp(path);
|
||||
if (fd == -1) error("mkstemp failed: %s", strerror(errno));
|
||||
close(fd);
|
||||
|
||||
static int len = 2;
|
||||
tmpfiles = realloc(tmpfiles, sizeof(char *) * len);
|
||||
tmpfiles[len - 2] = path;
|
||||
tmpfiles[len - 1] = NULL;
|
||||
len++;
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
static void run_subprocess(char **argv) {
|
||||
// If -### is given, dump the subprocess's command line.
|
||||
if (opt_hash_hash_hash) {
|
||||
fprintf(stderr, "%s", argv[0]);
|
||||
for (int i = 1; argv[i]; i++) fprintf(stderr, " %s", argv[i]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
if (fork() == 0) {
|
||||
// Child process. Run a new command.
|
||||
execvp(argv[0], argv);
|
||||
fprintf(stderr, "exec failed: %s: %s\n", argv[0], strerror(errno));
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
// Wait for the child process to finish.
|
||||
int status;
|
||||
while (wait(&status) > 0)
|
||||
;
|
||||
if (status != 0) exit(1);
|
||||
}
|
||||
|
||||
static void run_cc1(int argc, char **argv, char *input, char *output) {
|
||||
char **args = calloc(argc + 10, sizeof(char *));
|
||||
memcpy(args, argv, argc * sizeof(char *));
|
||||
args[argc++] = "-cc1";
|
||||
|
||||
if (input) {
|
||||
args[argc++] = "-cc1-input";
|
||||
args[argc++] = input;
|
||||
}
|
||||
|
||||
if (output) {
|
||||
args[argc++] = "-cc1-output";
|
||||
args[argc++] = output;
|
||||
}
|
||||
|
||||
run_subprocess(args);
|
||||
}
|
||||
|
||||
// Print tokens to stdout. Used for -E.
|
||||
static void print_tokens(Token *tok) {
|
||||
FILE *out = open_file(opt_o ? opt_o : "-");
|
||||
|
||||
int line = 1;
|
||||
for (; tok->kind != TK_EOF; tok = tok->next) {
|
||||
if (line > 1 && tok->at_bol) fprintf(out, "\n");
|
||||
if (tok->has_space && !tok->at_bol) fprintf(out, " ");
|
||||
fprintf(out, "%.*s", tok->len, tok->loc);
|
||||
line++;
|
||||
}
|
||||
fprintf(out, "\n");
|
||||
}
|
||||
|
||||
static bool in_std_include_path(char *path) {
|
||||
for (int i = 0; i < std_include_paths.len; i++) {
|
||||
char *dir = std_include_paths.data[i];
|
||||
int len = strlen(dir);
|
||||
if (strncmp(dir, path, len) == 0 && path[len] == '/') return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// If -M options is given, the compiler write a list of input files to
|
||||
// stdout in a format that "make" command can read. This feature is
|
||||
// used to automate file dependency management.
|
||||
static void print_dependencies(void) {
|
||||
char *path;
|
||||
if (opt_MF)
|
||||
path = opt_MF;
|
||||
else if (opt_MD)
|
||||
path = replace_extn(opt_o ? opt_o : base_file, ".d");
|
||||
else if (opt_o)
|
||||
path = opt_o;
|
||||
else
|
||||
path = "-";
|
||||
|
||||
FILE *out = open_file(path);
|
||||
if (opt_MT)
|
||||
fprintf(out, "%s:", opt_MT);
|
||||
else
|
||||
fprintf(out, "%s:", quote_makefile(replace_extn(base_file, ".o")));
|
||||
|
||||
File **files = get_input_files();
|
||||
|
||||
for (int i = 0; files[i]; i++) {
|
||||
if (opt_MMD && in_std_include_path(files[i]->name)) continue;
|
||||
fprintf(out, " \\\n %s", files[i]->name);
|
||||
}
|
||||
|
||||
fprintf(out, "\n\n");
|
||||
|
||||
if (opt_MP) {
|
||||
for (int i = 1; files[i]; i++) {
|
||||
if (opt_MMD && in_std_include_path(files[i]->name)) continue;
|
||||
fprintf(out, "%s:\n\n", quote_makefile(files[i]->name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Token *must_tokenize_file(char *path) {
|
||||
Token *tok = tokenize_file(path);
|
||||
if (!tok) error("%s: %s", path, strerror(errno));
|
||||
return tok;
|
||||
}
|
||||
|
||||
static Token *append_tokens(Token *tok1, Token *tok2) {
|
||||
if (!tok1 || tok1->kind == TK_EOF) return tok2;
|
||||
|
||||
Token *t = tok1;
|
||||
while (t->next->kind != TK_EOF) t = t->next;
|
||||
t->next = tok2;
|
||||
return tok1;
|
||||
}
|
||||
|
||||
static void cc1(void) {
|
||||
Token *tok = NULL;
|
||||
|
||||
// Process -include option
|
||||
for (int i = 0; i < opt_include.len; i++) {
|
||||
char *incl = opt_include.data[i];
|
||||
|
||||
char *path;
|
||||
if (file_exists(incl)) {
|
||||
path = incl;
|
||||
} else {
|
||||
path = search_include_paths(incl);
|
||||
if (!path) error("-include: %s: %s", incl, strerror(errno));
|
||||
}
|
||||
|
||||
Token *tok2 = must_tokenize_file(path);
|
||||
tok = append_tokens(tok, tok2);
|
||||
}
|
||||
|
||||
// Tokenize and parse.
|
||||
Token *tok2 = must_tokenize_file(base_file);
|
||||
tok = append_tokens(tok, tok2);
|
||||
tok = preprocess(tok);
|
||||
|
||||
// If -M or -MD are given, print file dependencies.
|
||||
if (opt_M || opt_MD) {
|
||||
print_dependencies();
|
||||
if (opt_M) return;
|
||||
}
|
||||
|
||||
// If -E is given, print out preprocessed C code as a result.
|
||||
if (opt_E) {
|
||||
print_tokens(tok);
|
||||
return;
|
||||
}
|
||||
|
||||
Obj *prog = parse(tok);
|
||||
|
||||
// Traverse the AST to emit assembly.
|
||||
FILE *out = open_file(output_file);
|
||||
codegen(prog, out);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
static void assemble(char *input, char *output) {
|
||||
char *cmd[] = {"as", "-W", "-I.", "-c", input, "-o", output, NULL};
|
||||
run_subprocess(cmd);
|
||||
}
|
||||
|
||||
static void run_linker(StringArray *inputs, char *output) {
|
||||
StringArray arr = {};
|
||||
|
||||
strarray_push(&arr, "ld");
|
||||
strarray_push(&arr, "-o");
|
||||
strarray_push(&arr, output);
|
||||
strarray_push(&arr, "-m");
|
||||
strarray_push(&arr, "elf_x86_64");
|
||||
|
||||
if (opt_shared) {
|
||||
strarray_push(&arr, "/usr/lib/x86_64-linux-gnu/crti.o");
|
||||
strarray_push(&arr, "/usr/lib/gcc/x86_64-linux-gnu/9/crtbeginS.o");
|
||||
} else {
|
||||
strarray_push(&arr, "/usr/lib/x86_64-linux-gnu/crt1.o");
|
||||
strarray_push(&arr, "/usr/lib/x86_64-linux-gnu/crti.o");
|
||||
strarray_push(&arr, "/usr/lib/gcc/x86_64-linux-gnu/9/crtbegin.o");
|
||||
}
|
||||
|
||||
strarray_push(&arr, "-L/usr/lib/gcc/x86_64-linux-gnu/9");
|
||||
strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu");
|
||||
strarray_push(&arr, "-L/usr/lib64");
|
||||
strarray_push(&arr, "-L/lib/x86_64-linux-gnu");
|
||||
strarray_push(&arr, "-L/lib64");
|
||||
strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu");
|
||||
strarray_push(&arr, "-L/usr/lib");
|
||||
strarray_push(&arr, "-L/lib");
|
||||
|
||||
if (!opt_static) {
|
||||
strarray_push(&arr, "-dynamic-linker");
|
||||
strarray_push(&arr, "/lib64/ld-linux-x86-64.so.2");
|
||||
}
|
||||
|
||||
for (int i = 0; i < ld_extra_args.len; i++)
|
||||
strarray_push(&arr, ld_extra_args.data[i]);
|
||||
|
||||
for (int i = 0; i < inputs->len; i++) strarray_push(&arr, inputs->data[i]);
|
||||
|
||||
if (opt_static) {
|
||||
strarray_push(&arr, "--start-group");
|
||||
strarray_push(&arr, "-lgcc");
|
||||
strarray_push(&arr, "-lgcc_eh");
|
||||
strarray_push(&arr, "-lc");
|
||||
strarray_push(&arr, "--end-group");
|
||||
} else {
|
||||
strarray_push(&arr, "-lc");
|
||||
strarray_push(&arr, "-lgcc");
|
||||
strarray_push(&arr, "--as-needed");
|
||||
strarray_push(&arr, "-lgcc_s");
|
||||
strarray_push(&arr, "--no-as-needed");
|
||||
}
|
||||
|
||||
if (opt_shared)
|
||||
strarray_push(&arr, "/usr/lib/gcc/x86_64-linux-gnu/9/crtendS.o");
|
||||
else
|
||||
strarray_push(&arr, "/usr/lib/gcc/x86_64-linux-gnu/9/crtend.o");
|
||||
|
||||
strarray_push(&arr, "/usr/lib/x86_64-linux-gnu/crtn.o");
|
||||
strarray_push(&arr, NULL);
|
||||
|
||||
run_subprocess(arr.data);
|
||||
}
|
||||
|
||||
static FileType get_file_type(char *filename) {
|
||||
if (opt_x != FILE_NONE) return opt_x;
|
||||
|
||||
if (ends_with(filename, ".a")) return FILE_AR;
|
||||
if (ends_with(filename, ".so")) return FILE_DSO;
|
||||
if (ends_with(filename, ".o")) return FILE_OBJ;
|
||||
if (ends_with(filename, ".c")) return FILE_C;
|
||||
if (ends_with(filename, ".s")) return FILE_ASM;
|
||||
|
||||
error("<command line>: unknown file extension: %s", filename);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
atexit(cleanup);
|
||||
init_macros();
|
||||
parse_args(argc, argv);
|
||||
|
||||
if (opt_cc1) {
|
||||
add_default_include_paths(argv[0]);
|
||||
cc1();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (input_paths.len > 1 && opt_o && (opt_c || opt_S | opt_E))
|
||||
error("cannot specify '-o' with '-c,' '-S' or '-E' with multiple files");
|
||||
|
||||
StringArray ld_args = {};
|
||||
|
||||
for (int i = 0; i < input_paths.len; i++) {
|
||||
char *input = input_paths.data[i];
|
||||
|
||||
if (!strncmp(input, "-l", 2)) {
|
||||
strarray_push(&ld_args, input);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strncmp(input, "-Wl,", 4)) {
|
||||
char *s = strdup(input + 4);
|
||||
char *arg = strtok(s, ",");
|
||||
while (arg) {
|
||||
strarray_push(&ld_args, arg);
|
||||
arg = strtok(NULL, ",");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
char *output;
|
||||
if (opt_o)
|
||||
output = opt_o;
|
||||
else if (opt_S)
|
||||
output = replace_extn(input, ".s");
|
||||
else
|
||||
output = replace_extn(input, ".o");
|
||||
|
||||
FileType type = get_file_type(input);
|
||||
|
||||
// Handle .o or .a
|
||||
if (type == FILE_OBJ || type == FILE_AR || type == FILE_DSO) {
|
||||
strarray_push(&ld_args, input);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle .s
|
||||
if (type == FILE_ASM) {
|
||||
if (!opt_S) assemble(input, output);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(type == FILE_C);
|
||||
|
||||
// Just preprocess
|
||||
if (opt_E || opt_M) {
|
||||
run_cc1(argc, argv, input, NULL);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compile
|
||||
if (opt_S) {
|
||||
run_cc1(argc, argv, input, output);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compile and assemble
|
||||
if (opt_c) {
|
||||
char *tmp = create_tmpfile();
|
||||
run_cc1(argc, argv, input, tmp);
|
||||
assemble(tmp, output);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compile, assemble and link
|
||||
char *tmp1 = create_tmpfile();
|
||||
char *tmp2 = create_tmpfile();
|
||||
run_cc1(argc, argv, input, tmp1);
|
||||
assemble(tmp1, tmp2);
|
||||
strarray_push(&ld_args, tmp2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ld_args.len > 0) run_linker(&ld_args, opt_o ? opt_o : "a.out");
|
||||
return 0;
|
||||
}
|
474
third_party/chibicc/chibicc.h
vendored
Normal file
474
third_party/chibicc/chibicc.h
vendored
Normal file
|
@ -0,0 +1,474 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/popcnt.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/struct/stat.h"
|
||||
#include "libc/calls/weirdtypes.h"
|
||||
#include "libc/conv/conv.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/log/log.h"
|
||||
#include "libc/macros.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/stdio/temp.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/time/struct/tm.h"
|
||||
#include "libc/time/time.h"
|
||||
#include "libc/unicode/unicode.h"
|
||||
#include "libc/x/x.h"
|
||||
#include "third_party/gdtoa/gdtoa.h"
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wswitch"
|
||||
|
||||
#ifndef __GNUC__
|
||||
#define __attribute__(x)
|
||||
#endif
|
||||
|
||||
typedef struct Type Type;
|
||||
typedef struct Node Node;
|
||||
typedef struct Member Member;
|
||||
typedef struct Relocation Relocation;
|
||||
typedef struct Hideset Hideset;
|
||||
|
||||
//
|
||||
// strarray.c
|
||||
//
|
||||
|
||||
typedef struct {
|
||||
char **data;
|
||||
int capacity;
|
||||
int len;
|
||||
} StringArray;
|
||||
|
||||
void strarray_push(StringArray *arr, char *s);
|
||||
|
||||
//
|
||||
// tokenize.c
|
||||
//
|
||||
|
||||
// Token
|
||||
typedef enum {
|
||||
TK_RESERVED, // Keywords or punctuators
|
||||
TK_IDENT, // Identifiers
|
||||
TK_STR, // String literals
|
||||
TK_NUM, // Numeric literals
|
||||
TK_PP_NUM, // Preprocessing numbers
|
||||
TK_EOF, // End-of-file markers
|
||||
} TokenKind;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
int file_no;
|
||||
char *contents;
|
||||
|
||||
// For #line directive
|
||||
char *display_name;
|
||||
int line_delta;
|
||||
} File;
|
||||
|
||||
// Token type
|
||||
typedef struct Token Token;
|
||||
struct Token {
|
||||
TokenKind kind; // Token kind
|
||||
Token *next; // Next token
|
||||
int64_t val; // If kind is TK_NUM, its value
|
||||
long double fval; // If kind is TK_NUM, its value
|
||||
char *loc; // Token location
|
||||
int len; // Token length
|
||||
Type *ty; // Used if TK_NUM or TK_STR
|
||||
char *str; // String literal contents including terminating '\0'
|
||||
|
||||
File *file; // Source location
|
||||
char *filename; // Filename
|
||||
int line_no; // Line number
|
||||
int line_delta; // Line number
|
||||
bool at_bol; // True if this token is at beginning of line
|
||||
bool has_space; // True if this token follows a space character
|
||||
Hideset *hideset; // For macro expansion
|
||||
Token *origin; // If this is expanded from a macro, the original token
|
||||
};
|
||||
|
||||
noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2)));
|
||||
noreturn void error_at(char *loc, char *fmt, ...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
noreturn void error_tok(Token *tok, char *fmt, ...)
|
||||
__attribute__((format(printf, 2, 3)));
|
||||
void warn_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3)));
|
||||
bool equal(Token *tok, char *op);
|
||||
Token *skip(Token *tok, char *op);
|
||||
bool consume(Token **rest, Token *tok, char *str);
|
||||
void convert_pp_tokens(Token *tok);
|
||||
File **get_input_files(void);
|
||||
File *new_file(char *name, int file_no, char *contents);
|
||||
Token *tokenize_string_literal(Token *tok, Type *basety);
|
||||
Token *tokenize(File *file);
|
||||
Token *tokenize_file(char *filename);
|
||||
|
||||
#define UNREACHABLE() error("internal error at %s:%d", __FILE__, __LINE__)
|
||||
|
||||
//
|
||||
// preprocess.c
|
||||
//
|
||||
|
||||
char *format(char *fmt, ...);
|
||||
char *search_include_paths(char *filename);
|
||||
bool file_exists(char *path);
|
||||
void init_macros(void);
|
||||
void define_macro(char *name, char *buf);
|
||||
void undef_macro(char *name);
|
||||
Token *preprocess(Token *tok);
|
||||
|
||||
//
|
||||
// parse.c
|
||||
//
|
||||
|
||||
// Variable or function
|
||||
typedef struct Obj Obj;
|
||||
struct Obj {
|
||||
Obj *next;
|
||||
char *name; // Variable name
|
||||
Type *ty; // Type
|
||||
Token *tok; // representative token
|
||||
bool is_local; // local or global/function
|
||||
int align; // alignment
|
||||
|
||||
// Local variable
|
||||
int offset;
|
||||
|
||||
// Global variable or function
|
||||
bool is_function;
|
||||
bool is_definition;
|
||||
bool is_static;
|
||||
|
||||
// Global variable
|
||||
bool is_tentative;
|
||||
bool is_tls;
|
||||
char *init_data;
|
||||
Relocation *rel;
|
||||
|
||||
// Function
|
||||
bool is_inline;
|
||||
Obj *params;
|
||||
Node *body;
|
||||
Obj *locals;
|
||||
Obj *va_area;
|
||||
Obj *alloca_bottom;
|
||||
int stack_size;
|
||||
|
||||
// Static inline function
|
||||
bool is_live;
|
||||
bool is_root;
|
||||
StringArray refs;
|
||||
};
|
||||
|
||||
// Global variable can be initialized either by a constant expression
|
||||
// or a pointer to another global variable. This struct represents the
|
||||
// latter.
|
||||
typedef struct Relocation Relocation;
|
||||
struct Relocation {
|
||||
Relocation *next;
|
||||
int offset;
|
||||
char **label;
|
||||
long addend;
|
||||
};
|
||||
|
||||
// AST node
|
||||
typedef enum {
|
||||
ND_NULL_EXPR, // Do nothing
|
||||
ND_ADD, // +
|
||||
ND_SUB, // -
|
||||
ND_MUL, // *
|
||||
ND_DIV, // /
|
||||
ND_NEG, // unary -
|
||||
ND_MOD, // %
|
||||
ND_BITAND, // &
|
||||
ND_BITOR, // |
|
||||
ND_BITXOR, // ^
|
||||
ND_SHL, // <<
|
||||
ND_SHR, // >>
|
||||
ND_EQ, // ==
|
||||
ND_NE, // !=
|
||||
ND_LT, // <
|
||||
ND_LE, // <=
|
||||
ND_ASSIGN, // =
|
||||
ND_COND, // ?:
|
||||
ND_COMMA, // ,
|
||||
ND_MEMBER, // . (struct member access)
|
||||
ND_ADDR, // unary &
|
||||
ND_DEREF, // unary *
|
||||
ND_NOT, // !
|
||||
ND_BITNOT, // ~
|
||||
ND_LOGAND, // &&
|
||||
ND_LOGOR, // ||
|
||||
ND_RETURN, // "return"
|
||||
ND_IF, // "if"
|
||||
ND_FOR, // "for" or "while"
|
||||
ND_DO, // "do"
|
||||
ND_SWITCH, // "switch"
|
||||
ND_CASE, // "case"
|
||||
ND_BLOCK, // { ... }
|
||||
ND_GOTO, // "goto"
|
||||
ND_GOTO_EXPR, // "goto" labels-as-values
|
||||
ND_LABEL, // Labeled statement
|
||||
ND_LABEL_VAL, // [GNU] Labels-as-values
|
||||
ND_FUNCALL, // Function call
|
||||
ND_EXPR_STMT, // Expression statement
|
||||
ND_STMT_EXPR, // Statement expression
|
||||
ND_VAR, // Variable
|
||||
ND_VLA_PTR, // VLA designator
|
||||
ND_NUM, // Integer
|
||||
ND_CAST, // Type cast
|
||||
ND_MEMZERO, // Zero-clear a stack variable
|
||||
ND_ASM, // "asm"
|
||||
ND_CAS, // Atomic compare-and-swap
|
||||
ND_EXCH, // Atomic exchange
|
||||
} NodeKind;
|
||||
|
||||
// AST node type
|
||||
struct Node {
|
||||
NodeKind kind; // Node kind
|
||||
Node *next; // Next node
|
||||
Type *ty; // Type, e.g. int or pointer to int
|
||||
Token *tok; // Representative token
|
||||
|
||||
Node *lhs; // Left-hand side
|
||||
Node *rhs; // Right-hand side
|
||||
|
||||
// "if" or "for" statement
|
||||
Node *cond;
|
||||
Node *then;
|
||||
Node *els;
|
||||
Node *init;
|
||||
Node *inc;
|
||||
|
||||
// "break" and "continue" labels
|
||||
char *brk_label;
|
||||
char *cont_label;
|
||||
|
||||
// Block or statement expression
|
||||
Node *body;
|
||||
|
||||
// Struct member access
|
||||
Member *member;
|
||||
|
||||
// Function call
|
||||
Type *func_ty;
|
||||
Node *args;
|
||||
bool pass_by_stack;
|
||||
Obj *ret_buffer;
|
||||
|
||||
// Goto or labeled statement, or labels-as-values
|
||||
char *label;
|
||||
char *unique_label;
|
||||
Node *goto_next;
|
||||
|
||||
// Switch
|
||||
Node *case_next;
|
||||
Node *default_case;
|
||||
|
||||
// Case
|
||||
long begin;
|
||||
long end;
|
||||
|
||||
// "asm" string literal
|
||||
char *asm_str;
|
||||
|
||||
// Atomic compare-and-swap
|
||||
Node *cas_addr;
|
||||
Node *cas_old;
|
||||
Node *cas_new;
|
||||
|
||||
// Atomic op= operators
|
||||
Obj *atomic_addr;
|
||||
Node *atomic_expr;
|
||||
|
||||
// Variable
|
||||
Obj *var;
|
||||
|
||||
// Numeric literal
|
||||
int64_t val;
|
||||
long double fval;
|
||||
};
|
||||
|
||||
Node *new_cast(Node *expr, Type *ty);
|
||||
int64_t const_expr(Token **rest, Token *tok);
|
||||
Obj *parse(Token *tok);
|
||||
|
||||
//
|
||||
// type.c
|
||||
//
|
||||
|
||||
typedef enum {
|
||||
TY_VOID,
|
||||
TY_BOOL,
|
||||
TY_CHAR,
|
||||
TY_SHORT,
|
||||
TY_INT,
|
||||
TY_LONG,
|
||||
TY_FLOAT,
|
||||
TY_DOUBLE,
|
||||
TY_LDOUBLE,
|
||||
TY_ENUM,
|
||||
TY_PTR,
|
||||
TY_FUNC,
|
||||
TY_ARRAY,
|
||||
TY_VLA, // variable-length array
|
||||
TY_STRUCT,
|
||||
TY_UNION,
|
||||
} TypeKind;
|
||||
|
||||
struct Type {
|
||||
TypeKind kind;
|
||||
int size; // sizeof() value
|
||||
int align; // alignment
|
||||
bool is_unsigned; // unsigned or signed
|
||||
bool is_atomic; // true if _Atomic
|
||||
Type *origin; // for type compatibility check
|
||||
|
||||
// Pointer-to or array-of type. We intentionally use the same member
|
||||
// to represent pointer/array duality in C.
|
||||
//
|
||||
// In many contexts in which a pointer is expected, we examine this
|
||||
// member instead of "kind" member to determine whether a type is a
|
||||
// pointer or not. That means in many contexts "array of T" is
|
||||
// naturally handled as if it were "pointer to T", as required by
|
||||
// the C spec.
|
||||
Type *base;
|
||||
|
||||
// Declaration
|
||||
Token *name;
|
||||
Token *name_pos;
|
||||
|
||||
// Array
|
||||
int array_len;
|
||||
|
||||
// Variable-length array
|
||||
Node *vla_len; // # of elements
|
||||
Obj *vla_size; // sizeof() value
|
||||
|
||||
// Struct
|
||||
Member *members;
|
||||
bool is_flexible;
|
||||
bool is_packed;
|
||||
|
||||
// Function type
|
||||
Type *return_ty;
|
||||
Type *params;
|
||||
bool is_variadic;
|
||||
Type *next;
|
||||
};
|
||||
|
||||
// Struct member
|
||||
struct Member {
|
||||
Member *next;
|
||||
Type *ty;
|
||||
Token *tok; // for error message
|
||||
Token *name;
|
||||
int idx;
|
||||
int align;
|
||||
int offset;
|
||||
|
||||
// Bitfield
|
||||
bool is_bitfield;
|
||||
int bit_offset;
|
||||
int bit_width;
|
||||
};
|
||||
|
||||
extern Type *ty_void;
|
||||
extern Type *ty_bool;
|
||||
|
||||
extern Type *ty_char;
|
||||
extern Type *ty_short;
|
||||
extern Type *ty_int;
|
||||
extern Type *ty_long;
|
||||
|
||||
extern Type *ty_uchar;
|
||||
extern Type *ty_ushort;
|
||||
extern Type *ty_uint;
|
||||
extern Type *ty_ulong;
|
||||
|
||||
extern Type *ty_float;
|
||||
extern Type *ty_double;
|
||||
extern Type *ty_ldouble;
|
||||
|
||||
bool is_integer(Type *ty);
|
||||
bool is_flonum(Type *ty);
|
||||
bool is_numeric(Type *ty);
|
||||
bool is_compatible(Type *t1, Type *t2);
|
||||
Type *copy_type(Type *ty);
|
||||
Type *pointer_to(Type *base);
|
||||
Type *func_type(Type *return_ty);
|
||||
Type *array_of(Type *base, int size);
|
||||
Type *vla_of(Type *base, Node *expr);
|
||||
Type *enum_type(void);
|
||||
Type *struct_type(void);
|
||||
void add_type(Node *node);
|
||||
|
||||
//
|
||||
// codegen.c
|
||||
//
|
||||
|
||||
void codegen(Obj *prog, FILE *out);
|
||||
int align_to(int n, int align);
|
||||
|
||||
//
|
||||
// unicode.c
|
||||
//
|
||||
|
||||
int encode_utf8(char *buf, uint32_t c);
|
||||
uint32_t decode_utf8(char **new_pos, char *p);
|
||||
bool is_ident1(uint32_t c);
|
||||
bool is_ident2(uint32_t c);
|
||||
int str_width(char *p, int len);
|
||||
|
||||
//
|
||||
// hashmap.c
|
||||
//
|
||||
|
||||
typedef struct {
|
||||
char *key;
|
||||
int keylen;
|
||||
void *val;
|
||||
} HashEntry;
|
||||
|
||||
typedef struct {
|
||||
HashEntry *buckets;
|
||||
int capacity;
|
||||
int used;
|
||||
} HashMap;
|
||||
|
||||
void *hashmap_get(HashMap *map, char *key);
|
||||
void *hashmap_get2(HashMap *map, char *key, int keylen);
|
||||
void hashmap_put(HashMap *map, char *key, void *val);
|
||||
void hashmap_put2(HashMap *map, char *key, int keylen, void *val);
|
||||
void hashmap_delete(HashMap *map, char *key);
|
||||
void hashmap_delete2(HashMap *map, char *key, int keylen);
|
||||
void hashmap_test(void);
|
||||
|
||||
//
|
||||
// main.c
|
||||
//
|
||||
|
||||
extern StringArray include_paths;
|
||||
extern bool opt_fpic;
|
||||
extern bool opt_fcommon;
|
||||
extern char *base_file;
|
||||
|
||||
typedef struct StaticAsm {
|
||||
struct StaticAsm *next;
|
||||
Node *body;
|
||||
} StaticAsm;
|
||||
|
||||
extern struct StaticAsm *staticasms;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */
|
78
third_party/chibicc/chibicc.mk
vendored
Normal file
78
third_party/chibicc/chibicc.mk
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
|
||||
#───vi: set et ft=make ts=8 tw=8 fenc=utf-8 :vi───────────────────────┘
|
||||
|
||||
PKGS += THIRD_PARTY_CHIBICC
|
||||
|
||||
THIRD_PARTY_CHIBICC_ARTIFACTS += THIRD_PARTY_CHIBICC_A
|
||||
THIRD_PARTY_CHIBICC = $(THIRD_PARTY_CHIBICC_A_DEPS) $(THIRD_PARTY_CHIBICC_A)
|
||||
THIRD_PARTY_CHIBICC_A = o/$(MODE)/third_party/chibicc/chibicc.a
|
||||
THIRD_PARTY_CHIBICC_A_FILES := $(wildcard third_party/chibicc/*)
|
||||
THIRD_PARTY_CHIBICC_A_HDRS = $(filter %.h,$(THIRD_PARTY_CHIBICC_A_FILES))
|
||||
THIRD_PARTY_CHIBICC_A_SRCS_S = $(filter %.S,$(THIRD_PARTY_CHIBICC_A_FILES))
|
||||
THIRD_PARTY_CHIBICC_A_SRCS_C = $(filter %.c,$(THIRD_PARTY_CHIBICC_A_FILES))
|
||||
|
||||
THIRD_PARTY_CHIBICC_BINS = \
|
||||
o/$(MODE)/third_party/chibicc/chibicc.com
|
||||
|
||||
THIRD_PARTY_CHIBICC_A_SRCS = \
|
||||
$(THIRD_PARTY_CHIBICC_A_SRCS_S) \
|
||||
$(THIRD_PARTY_CHIBICC_A_SRCS_C)
|
||||
|
||||
THIRD_PARTY_CHIBICC_A_OBJS = \
|
||||
$(THIRD_PARTY_CHIBICC_A_SRCS:%=o/$(MODE)/%.zip.o) \
|
||||
$(THIRD_PARTY_CHIBICC_A_SRCS_S:%.S=o/$(MODE)/%.o) \
|
||||
$(THIRD_PARTY_CHIBICC_A_SRCS_C:%.c=o/$(MODE)/%.o)
|
||||
|
||||
THIRD_PARTY_CHIBICC_A_CHECKS = \
|
||||
$(THIRD_PARTY_CHIBICC_A).pkg \
|
||||
$(THIRD_PARTY_CHIBICC_A_HDRS:%=o/$(MODE)/%.ok)
|
||||
|
||||
THIRD_PARTY_CHIBICC_A_DIRECTDEPS = \
|
||||
LIBC_STR \
|
||||
LIBC_STUBS \
|
||||
LIBC_FMT \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_UNICODE \
|
||||
LIBC_STDIO \
|
||||
LIBC_MEM \
|
||||
LIBC_LOG \
|
||||
LIBC_CALLS \
|
||||
LIBC_CALLS_HEFTY \
|
||||
LIBC_TIME \
|
||||
LIBC_X \
|
||||
LIBC_CONV \
|
||||
LIBC_RUNTIME \
|
||||
THIRD_PARTY_GDTOA
|
||||
|
||||
THIRD_PARTY_CHIBICC_A_DEPS := \
|
||||
$(call uniq,$(foreach x,$(THIRD_PARTY_CHIBICC_A_DIRECTDEPS),$($(x))))
|
||||
|
||||
$(THIRD_PARTY_CHIBICC_A): \
|
||||
third_party/chibicc/ \
|
||||
$(THIRD_PARTY_CHIBICC_A).pkg \
|
||||
$(THIRD_PARTY_CHIBICC_A_OBJS)
|
||||
|
||||
$(THIRD_PARTY_CHIBICC_A).pkg: \
|
||||
$(THIRD_PARTY_CHIBICC_A_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_CHIBICC_A_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
||||
o/$(MODE)/third_party/chibicc/%.com.dbg: \
|
||||
$(THIRD_PARTY_CHIBICC_A_DEPS) \
|
||||
$(THIRD_PARTY_CHIBICC_A) \
|
||||
o/$(MODE)/third_party/chibicc/%.o \
|
||||
$(THIRD_PARTY_CHIBICC_A).pkg \
|
||||
$(CRT) \
|
||||
$(APE)
|
||||
@$(APELINK)
|
||||
|
||||
THIRD_PARTY_CHIBICC_LIBS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_CHIBICC_SRCS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_CHIBICC_HDRS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_HDRS))
|
||||
THIRD_PARTY_CHIBICC_CHECKS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_CHECKS))
|
||||
THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS))
|
||||
$(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk
|
||||
|
||||
.PHONY: o/$(MODE)/third_party/chibicc
|
||||
o/$(MODE)/third_party/chibicc: \
|
||||
$(THIRD_PARTY_CHIBICC_BINS) \
|
||||
$(THIRD_PARTY_CHIBICC_CHECKS)
|
1590
third_party/chibicc/codegen.c
vendored
Normal file
1590
third_party/chibicc/codegen.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
130
third_party/chibicc/hashmap.c
vendored
Normal file
130
third_party/chibicc/hashmap.c
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
// This is an implementation of the open-addressing hash table.
|
||||
|
||||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
#define TOMBSTONE ((void *)-1) // Represents a deleted hash entry
|
||||
|
||||
static uint64_t fnv_hash(char *s, int len) {
|
||||
uint64_t hash = 0xcbf29ce484222325;
|
||||
for (int i = 0; i < len; i++) {
|
||||
hash *= 0x100000001b3;
|
||||
hash ^= (unsigned char)s[i];
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
// Make room for new entires in a given hashmap by removing
|
||||
// tombstones and possibly extending the bucket size.
|
||||
static void rehash(HashMap *map) {
|
||||
// Compute the size of the new hashmap.
|
||||
int nkeys = 0;
|
||||
for (int i = 0; i < map->capacity; i++)
|
||||
if (map->buckets[i].key && map->buckets[i].key != TOMBSTONE) nkeys++;
|
||||
int cap = map->capacity;
|
||||
while ((nkeys * 100) / cap >= 50) cap = cap * 2;
|
||||
// Create a new hashmap and copy all key-values.
|
||||
HashMap map2 = {};
|
||||
map2.buckets = calloc(cap, sizeof(HashEntry));
|
||||
map2.capacity = cap;
|
||||
for (int i = 0; i < map->capacity; i++) {
|
||||
HashEntry *ent = &map->buckets[i];
|
||||
if (ent->key && ent->key != TOMBSTONE)
|
||||
hashmap_put2(&map2, ent->key, ent->keylen, ent->val);
|
||||
}
|
||||
assert(map2.used == nkeys);
|
||||
*map = map2;
|
||||
}
|
||||
|
||||
static bool match(HashEntry *ent, char *key, int keylen) {
|
||||
return ent->key && ent->key != TOMBSTONE && ent->keylen == keylen &&
|
||||
memcmp(ent->key, key, keylen) == 0;
|
||||
}
|
||||
|
||||
static HashEntry *get_entry(HashMap *map, char *key, int keylen) {
|
||||
if (!map->buckets) return NULL;
|
||||
uint64_t hash = fnv_hash(key, keylen);
|
||||
for (int i = 0; i < map->capacity; i++) {
|
||||
HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
|
||||
if (match(ent, key, keylen)) return ent;
|
||||
if (ent->key == NULL) return NULL;
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
static HashEntry *get_or_insert_entry(HashMap *map, char *key, int keylen) {
|
||||
if (!map->buckets) {
|
||||
map->buckets = calloc((map->capacity = 16), sizeof(HashEntry));
|
||||
}
|
||||
if ((map->used * 100) / map->capacity >= 70) rehash(map);
|
||||
uint64_t hash = fnv_hash(key, keylen);
|
||||
for (int i = 0; i < map->capacity; i++) {
|
||||
HashEntry *ent = &map->buckets[(hash + i) % map->capacity];
|
||||
if (match(ent, key, keylen)) return ent;
|
||||
if (ent->key == TOMBSTONE) {
|
||||
ent->key = key;
|
||||
ent->keylen = keylen;
|
||||
return ent;
|
||||
}
|
||||
if (ent->key == NULL) {
|
||||
ent->key = key;
|
||||
ent->keylen = keylen;
|
||||
map->used++;
|
||||
return ent;
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void *hashmap_get(HashMap *map, char *key) {
|
||||
return hashmap_get2(map, key, strlen(key));
|
||||
}
|
||||
|
||||
void *hashmap_get2(HashMap *map, char *key, int keylen) {
|
||||
HashEntry *ent = get_entry(map, key, keylen);
|
||||
return ent ? ent->val : NULL;
|
||||
}
|
||||
|
||||
void hashmap_put(HashMap *map, char *key, void *val) {
|
||||
hashmap_put2(map, key, strlen(key), val);
|
||||
}
|
||||
|
||||
void hashmap_put2(HashMap *map, char *key, int keylen, void *val) {
|
||||
HashEntry *ent = get_or_insert_entry(map, key, keylen);
|
||||
ent->val = val;
|
||||
}
|
||||
|
||||
void hashmap_delete(HashMap *map, char *key) {
|
||||
hashmap_delete2(map, key, strlen(key));
|
||||
}
|
||||
|
||||
void hashmap_delete2(HashMap *map, char *key, int keylen) {
|
||||
HashEntry *ent = get_entry(map, key, keylen);
|
||||
if (ent) ent->key = TOMBSTONE;
|
||||
}
|
||||
|
||||
void hashmap_test(void) {
|
||||
HashMap *map = calloc(1, sizeof(HashMap));
|
||||
for (int i = 0; i < 5000; i++)
|
||||
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||
for (int i = 1000; i < 2000; i++) hashmap_delete(map, format("key %d", i));
|
||||
for (int i = 1500; i < 1600; i++)
|
||||
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||
for (int i = 6000; i < 7000; i++)
|
||||
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||
for (int i = 0; i < 1000; i++)
|
||||
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||
for (int i = 1000; i < 1500; i++)
|
||||
assert(hashmap_get(map, "no such key") == NULL);
|
||||
for (int i = 1500; i < 1600; i++)
|
||||
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||
for (int i = 1600; i < 2000; i++)
|
||||
assert(hashmap_get(map, "no such key") == NULL);
|
||||
for (int i = 2000; i < 5000; i++)
|
||||
assert((size_t)hashmap_get(map, format("key %d", i)) == i);
|
||||
for (int i = 5000; i < 6000; i++)
|
||||
assert(hashmap_get(map, "no such key") == NULL);
|
||||
for (int i = 6000; i < 7000; i++)
|
||||
hashmap_put(map, format("key %d", i), (void *)(size_t)i);
|
||||
assert(hashmap_get(map, "no such key") == NULL);
|
||||
printf("OK\n");
|
||||
}
|
3301
third_party/chibicc/parse.c
vendored
Normal file
3301
third_party/chibicc/parse.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
1099
third_party/chibicc/preprocess.c
vendored
Normal file
1099
third_party/chibicc/preprocess.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
16
third_party/chibicc/strarray.c
vendored
Normal file
16
third_party/chibicc/strarray.c
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
void strarray_push(StringArray *arr, char *s) {
|
||||
if (!arr->data) {
|
||||
arr->data = calloc(8, sizeof(char *));
|
||||
arr->capacity = 8;
|
||||
}
|
||||
|
||||
if (arr->capacity == arr->len) {
|
||||
arr->data = realloc(arr->data, sizeof(char *) * arr->capacity * 2);
|
||||
arr->capacity *= 2;
|
||||
for (int i = arr->len; i < arr->capacity; i++) arr->data[i] = NULL;
|
||||
}
|
||||
|
||||
arr->data[arr->len++] = s;
|
||||
}
|
785
third_party/chibicc/tokenize.c
vendored
Normal file
785
third_party/chibicc/tokenize.c
vendored
Normal file
|
@ -0,0 +1,785 @@
|
|||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
// Input file
|
||||
static File *current_file;
|
||||
|
||||
// A list of all input files.
|
||||
static File **input_files;
|
||||
|
||||
// True if the current position is at the beginning of a line
|
||||
static bool at_bol;
|
||||
|
||||
// True if the current position follows a space character
|
||||
static bool has_space;
|
||||
|
||||
// Reports an error and exit.
|
||||
void error(char *fmt, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Reports an error message in the following format.
|
||||
//
|
||||
// foo.c:10: x = y + 1;
|
||||
// ^ <error message here>
|
||||
static void verror_at(char *filename, char *input, int line_no, char *loc,
|
||||
char *fmt, va_list ap) {
|
||||
// Find a line containing `loc`.
|
||||
char *line = loc;
|
||||
while (input < line && line[-1] != '\n') line--;
|
||||
|
||||
char *end = loc;
|
||||
while (*end && *end != '\n') end++;
|
||||
|
||||
// Print out the line.
|
||||
int indent = fprintf(stderr, "%s:%d: ", filename, line_no);
|
||||
fprintf(stderr, "%.*s\n", (int)(end - line), line);
|
||||
|
||||
// Show the error message.
|
||||
int pos = str_width(line, loc - line) + indent;
|
||||
|
||||
fprintf(stderr, "%*s", pos, ""); // print pos spaces.
|
||||
fprintf(stderr, "^ ");
|
||||
vfprintf(stderr, fmt, ap);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
void error_at(char *loc, char *fmt, ...) {
|
||||
int line_no = 1;
|
||||
for (char *p = current_file->contents; p < loc; p++)
|
||||
if (*p == '\n') line_no++;
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
verror_at(current_file->name, current_file->contents, line_no, loc, fmt, ap);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void error_tok(Token *tok, char *fmt, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
|
||||
ap);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void warn_tok(Token *tok, char *fmt, ...) {
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
verror_at(tok->file->name, tok->file->contents, tok->line_no, tok->loc, fmt,
|
||||
ap);
|
||||
}
|
||||
|
||||
// Consumes the current token if it matches `op`.
|
||||
bool equal(Token *tok, char *op) {
|
||||
return strlen(op) == tok->len && !strncmp(tok->loc, op, tok->len);
|
||||
}
|
||||
|
||||
// Ensure that the current token is `op`.
|
||||
Token *skip(Token *tok, char *op) {
|
||||
if (!equal(tok, op)) error_tok(tok, "expected '%s'", op);
|
||||
return tok->next;
|
||||
}
|
||||
|
||||
bool consume(Token **rest, Token *tok, char *str) {
|
||||
if (equal(tok, str)) {
|
||||
*rest = tok->next;
|
||||
return true;
|
||||
}
|
||||
*rest = tok;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a new token and add it as the next token of `cur`.
|
||||
static Token *new_token(TokenKind kind, char *start, char *end) {
|
||||
Token *tok = calloc(1, sizeof(Token));
|
||||
tok->kind = kind;
|
||||
tok->loc = start;
|
||||
tok->len = end - start;
|
||||
tok->file = current_file;
|
||||
tok->filename = current_file->display_name;
|
||||
tok->at_bol = at_bol;
|
||||
tok->has_space = has_space;
|
||||
|
||||
at_bol = has_space = false;
|
||||
return tok;
|
||||
}
|
||||
|
||||
static bool starts_with(char *p, char *q) {
|
||||
return strncmp(p, q, strlen(q)) == 0;
|
||||
}
|
||||
|
||||
// Read an identifier and returns a pointer pointing to the end
|
||||
// of an identifier.
|
||||
//
|
||||
// Returns null if p does not point to a valid identifier.
|
||||
static char *read_ident(char *p) {
|
||||
uint32_t c = decode_utf8(&p, p);
|
||||
if (!is_ident1(c)) return NULL;
|
||||
|
||||
for (;;) {
|
||||
char *q;
|
||||
c = decode_utf8(&q, p);
|
||||
if (!is_ident2(c)) return p;
|
||||
p = q;
|
||||
}
|
||||
}
|
||||
|
||||
static int from_hex(char c) {
|
||||
if ('0' <= c && c <= '9') return c - '0';
|
||||
if ('a' <= c && c <= 'f') return c - 'a' + 10;
|
||||
return c - 'A' + 10;
|
||||
}
|
||||
|
||||
static bool is_keyword(Token *tok) {
|
||||
static HashMap map;
|
||||
|
||||
if (map.capacity == 0) {
|
||||
static char *kw[] = {
|
||||
"return", "if", "else",
|
||||
"for", "while", "int",
|
||||
"sizeof", "char", "struct",
|
||||
"union", "short", "long",
|
||||
"void", "typedef", "_Bool",
|
||||
"enum", "static", "goto",
|
||||
"break", "continue", "switch",
|
||||
"case", "default", "extern",
|
||||
"_Alignof", "_Alignas", "do",
|
||||
"signed", "unsigned", "const",
|
||||
"volatile", "auto", "register",
|
||||
"restrict", "__restrict", "__restrict__",
|
||||
"_Noreturn", "float", "double",
|
||||
"typeof", "asm", "_Thread_local",
|
||||
"__thread", "_Atomic", "__attribute__",
|
||||
};
|
||||
|
||||
for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++)
|
||||
hashmap_put(&map, kw[i], (void *)1);
|
||||
}
|
||||
|
||||
return hashmap_get2(&map, tok->loc, tok->len);
|
||||
}
|
||||
|
||||
static int read_escaped_char(char **new_pos, char *p) {
|
||||
if ('0' <= *p && *p <= '7') {
|
||||
// Read an octal number.
|
||||
int c = *p++ - '0';
|
||||
if ('0' <= *p && *p <= '7') {
|
||||
c = (c << 3) + (*p++ - '0');
|
||||
if ('0' <= *p && *p <= '7') c = (c << 3) + (*p++ - '0');
|
||||
}
|
||||
*new_pos = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
if (*p == 'x') {
|
||||
// Read a hexadecimal number.
|
||||
p++;
|
||||
if (!isxdigit(*p)) error_at(p, "invalid hex escape sequence");
|
||||
|
||||
int c = 0;
|
||||
for (; isxdigit(*p); p++) c = (c << 4) + from_hex(*p);
|
||||
*new_pos = p;
|
||||
return c;
|
||||
}
|
||||
|
||||
*new_pos = p + 1;
|
||||
|
||||
switch (*p) {
|
||||
case 'a':
|
||||
return '\a';
|
||||
case 'b':
|
||||
return '\b';
|
||||
case 't':
|
||||
return '\t';
|
||||
case 'n':
|
||||
return '\n';
|
||||
case 'v':
|
||||
return '\v';
|
||||
case 'f':
|
||||
return '\f';
|
||||
case 'r':
|
||||
return '\r';
|
||||
// [GNU] \e for the ASCII escape character is a GNU C extension.
|
||||
case 'e':
|
||||
return 27;
|
||||
default:
|
||||
return *p;
|
||||
}
|
||||
}
|
||||
|
||||
// Find a closing double-quote.
|
||||
static char *string_literal_end(char *p) {
|
||||
char *start = p;
|
||||
for (; *p != '"'; p++) {
|
||||
if (*p == '\0') error_at(start, "unclosed string literal");
|
||||
if (*p == '\\') p++;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static Token *read_string_literal(char *start, char *quote) {
|
||||
char *end = string_literal_end(quote + 1);
|
||||
char *buf = calloc(1, end - quote);
|
||||
int len = 0;
|
||||
|
||||
for (char *p = quote + 1; p < end;) {
|
||||
if (*p == '\\')
|
||||
buf[len++] = read_escaped_char(&p, p + 1);
|
||||
else
|
||||
buf[len++] = *p++;
|
||||
}
|
||||
|
||||
Token *tok = new_token(TK_STR, start, end + 1);
|
||||
tok->ty = array_of(ty_char, len + 1);
|
||||
tok->str = buf;
|
||||
return tok;
|
||||
}
|
||||
|
||||
// Read a UTF-8-encoded string literal and transcode it in UTF-16.
|
||||
//
|
||||
// UTF-16 is yet another variable-width encoding for Unicode. Code
|
||||
// points smaller than U+10000 are encoded in 2 bytes. Code points
|
||||
// equal to or larger than that are encoded in 4 bytes. Each 2 bytes
|
||||
// in the 4 byte sequence is called "surrogate", and a 4 byte sequence
|
||||
// is called a "surrogate pair".
|
||||
static Token *read_utf16_string_literal(char *start, char *quote) {
|
||||
char *end = string_literal_end(quote + 1);
|
||||
uint16_t *buf = calloc(2, end - start - 1);
|
||||
int len = 0;
|
||||
|
||||
for (char *p = quote + 1; p < end;) {
|
||||
if (*p == '\\') {
|
||||
buf[len++] = read_escaped_char(&p, p + 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t c = decode_utf8(&p, p);
|
||||
if (c < 0x10000) {
|
||||
// Encode a code point in 2 bytes.
|
||||
buf[len++] = c;
|
||||
} else {
|
||||
// Encode a code point in 4 bytes.
|
||||
c -= 0x10000;
|
||||
buf[len++] = 0xd800 + ((c >> 10) & 0x3ff);
|
||||
buf[len++] = 0xdc00 + (c & 0x3ff);
|
||||
}
|
||||
}
|
||||
|
||||
Token *tok = new_token(TK_STR, start, end + 1);
|
||||
tok->ty = array_of(ty_ushort, len + 1);
|
||||
tok->str = (char *)buf;
|
||||
return tok;
|
||||
}
|
||||
|
||||
// Read a UTF-8-encoded string literal and transcode it in UTF-32.
|
||||
//
|
||||
// UTF-32 is a fixed-width encoding for Unicode. Each code point is
|
||||
// encoded in 4 bytes.
|
||||
static Token *read_utf32_string_literal(char *start, char *quote, Type *ty) {
|
||||
char *end = string_literal_end(quote + 1);
|
||||
uint32_t *buf = calloc(4, end - quote);
|
||||
int len = 0;
|
||||
|
||||
for (char *p = quote + 1; p < end;) {
|
||||
if (*p == '\\')
|
||||
buf[len++] = read_escaped_char(&p, p + 1);
|
||||
else
|
||||
buf[len++] = decode_utf8(&p, p);
|
||||
}
|
||||
|
||||
Token *tok = new_token(TK_STR, start, end + 1);
|
||||
tok->ty = array_of(ty, len + 1);
|
||||
tok->str = (char *)buf;
|
||||
return tok;
|
||||
}
|
||||
|
||||
static Token *read_char_literal(char *start, char *quote, Type *ty) {
|
||||
char *p = quote + 1;
|
||||
if (*p == '\0') error_at(start, "unclosed char literal");
|
||||
|
||||
int c;
|
||||
if (*p == '\\')
|
||||
c = read_escaped_char(&p, p + 1);
|
||||
else
|
||||
c = decode_utf8(&p, p);
|
||||
|
||||
char *end = strchr(p, '\'');
|
||||
if (!end) error_at(p, "unclosed char literal");
|
||||
|
||||
Token *tok = new_token(TK_NUM, start, end + 1);
|
||||
tok->val = c;
|
||||
tok->ty = ty;
|
||||
return tok;
|
||||
}
|
||||
|
||||
static bool convert_pp_int(Token *tok) {
|
||||
char *p = tok->loc;
|
||||
|
||||
// Read a binary, octal, decimal or hexadecimal number.
|
||||
int base = 10;
|
||||
if (!strncasecmp(p, "0x", 2) && isxdigit(p[2])) {
|
||||
p += 2;
|
||||
base = 16;
|
||||
} else if (!strncasecmp(p, "0b", 2) && (p[2] == '0' || p[2] == '1')) {
|
||||
p += 2;
|
||||
base = 2;
|
||||
} else if (*p == '0') {
|
||||
base = 8;
|
||||
}
|
||||
|
||||
int64_t val = strtoul(p, &p, base);
|
||||
|
||||
// Read U, L or LL suffixes.
|
||||
bool l = false;
|
||||
bool u = false;
|
||||
|
||||
if (starts_with(p, "LLU") || starts_with(p, "LLu") || starts_with(p, "llU") ||
|
||||
starts_with(p, "llu") || starts_with(p, "ULL") || starts_with(p, "Ull") ||
|
||||
starts_with(p, "uLL") || starts_with(p, "ull")) {
|
||||
p += 3;
|
||||
l = u = true;
|
||||
} else if (!strncasecmp(p, "lu", 2) || !strncasecmp(p, "ul", 2)) {
|
||||
p += 2;
|
||||
l = u = true;
|
||||
} else if (starts_with(p, "LL") || starts_with(p, "ll")) {
|
||||
p += 2;
|
||||
l = true;
|
||||
} else if (*p == 'L' || *p == 'l') {
|
||||
p++;
|
||||
l = true;
|
||||
} else if (*p == 'U' || *p == 'u') {
|
||||
p++;
|
||||
u = true;
|
||||
}
|
||||
|
||||
if (p != tok->loc + tok->len) return false;
|
||||
|
||||
// Infer a type.
|
||||
Type *ty;
|
||||
if (base == 10) {
|
||||
if (l && u)
|
||||
ty = ty_ulong;
|
||||
else if (l)
|
||||
ty = ty_long;
|
||||
else if (u)
|
||||
ty = (val >> 32) ? ty_ulong : ty_uint;
|
||||
else
|
||||
ty = (val >> 31) ? ty_long : ty_int;
|
||||
} else {
|
||||
if (l && u)
|
||||
ty = ty_ulong;
|
||||
else if (l)
|
||||
ty = (val >> 63) ? ty_ulong : ty_long;
|
||||
else if (u)
|
||||
ty = (val >> 32) ? ty_ulong : ty_uint;
|
||||
else if (val >> 63)
|
||||
ty = ty_ulong;
|
||||
else if (val >> 32)
|
||||
ty = ty_long;
|
||||
else if (val >> 31)
|
||||
ty = ty_uint;
|
||||
else
|
||||
ty = ty_int;
|
||||
}
|
||||
|
||||
tok->kind = TK_NUM;
|
||||
tok->val = val;
|
||||
tok->ty = ty;
|
||||
return true;
|
||||
}
|
||||
|
||||
// The definition of the numeric literal at the preprocessing stage
|
||||
// is more relaxed than the definition of that at the later stages.
|
||||
// In order to handle that, a numeric literal is tokenized as a
|
||||
// "pp-number" token first and then converted to a regular number
|
||||
// token after preprocessing.
|
||||
//
|
||||
// This function converts a pp-number token to a regular number token.
|
||||
static void convert_pp_number(Token *tok) {
|
||||
// Try to parse as an integer constant.
|
||||
if (convert_pp_int(tok)) return;
|
||||
|
||||
// If it's not an integer, it must be a floating point constant.
|
||||
char *end;
|
||||
long double val = strtold(tok->loc, &end);
|
||||
|
||||
Type *ty;
|
||||
if (*end == 'f' || *end == 'F') {
|
||||
ty = ty_float;
|
||||
end++;
|
||||
} else if (*end == 'l' || *end == 'L') {
|
||||
ty = ty_ldouble;
|
||||
end++;
|
||||
} else {
|
||||
ty = ty_double;
|
||||
}
|
||||
|
||||
if (tok->loc + tok->len != end) error_tok(tok, "invalid numeric constant");
|
||||
|
||||
tok->kind = TK_NUM;
|
||||
tok->fval = val;
|
||||
tok->ty = ty;
|
||||
}
|
||||
|
||||
void convert_pp_tokens(Token *tok) {
|
||||
for (Token *t = tok; t->kind != TK_EOF; t = t->next) {
|
||||
if (is_keyword(t))
|
||||
t->kind = TK_RESERVED;
|
||||
else if (t->kind == TK_PP_NUM)
|
||||
convert_pp_number(t);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize line info for all tokens.
|
||||
static void add_line_numbers(Token *tok) {
|
||||
char *p = current_file->contents;
|
||||
int n = 1;
|
||||
|
||||
do {
|
||||
if (p == tok->loc) {
|
||||
tok->line_no = n;
|
||||
tok = tok->next;
|
||||
}
|
||||
if (*p == '\n') n++;
|
||||
} while (*p++);
|
||||
}
|
||||
|
||||
Token *tokenize_string_literal(Token *tok, Type *basety) {
|
||||
Token *t;
|
||||
if (basety->size == 2)
|
||||
t = read_utf16_string_literal(tok->loc, tok->loc);
|
||||
else
|
||||
t = read_utf32_string_literal(tok->loc, tok->loc, basety);
|
||||
t->next = tok->next;
|
||||
return t;
|
||||
}
|
||||
|
||||
// Tokenize a given string and returns new tokens.
|
||||
Token *tokenize(File *file) {
|
||||
current_file = file;
|
||||
|
||||
char *p = file->contents;
|
||||
Token head = {};
|
||||
Token *cur = &head;
|
||||
|
||||
at_bol = true;
|
||||
has_space = false;
|
||||
|
||||
while (*p) {
|
||||
// Skip line comments.
|
||||
if (starts_with(p, "//")) {
|
||||
p += 2;
|
||||
while (*p != '\n') p++;
|
||||
has_space = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip block comments.
|
||||
if (starts_with(p, "/*")) {
|
||||
char *q = strstr(p + 2, "*/");
|
||||
if (!q) error_at(p, "unclosed block comment");
|
||||
p = q + 2;
|
||||
has_space = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip newline.
|
||||
if (*p == '\n') {
|
||||
p++;
|
||||
at_bol = true;
|
||||
has_space = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip whitespace characters.
|
||||
if (isspace(*p)) {
|
||||
p++;
|
||||
has_space = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Numeric literal
|
||||
if (isdigit(*p) || (*p == '.' && isdigit(p[1]))) {
|
||||
char *q = p++;
|
||||
for (;;) {
|
||||
if (p[0] && p[1] && strchr("eEpP", p[0]) && strchr("+-", p[1]))
|
||||
p += 2;
|
||||
else if (isalnum(*p) || *p == '.')
|
||||
p++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
cur = cur->next = new_token(TK_PP_NUM, q, p);
|
||||
continue;
|
||||
}
|
||||
|
||||
// String literal
|
||||
if (*p == '"') {
|
||||
cur = cur->next = read_string_literal(p, p);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-8 string literal
|
||||
if (starts_with(p, "u8\"")) {
|
||||
cur = cur->next = read_string_literal(p, p + 2);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-16 string literal
|
||||
if (starts_with(p, "u\"")) {
|
||||
cur = cur->next = read_utf16_string_literal(p, p + 1);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Wide string literal
|
||||
if (starts_with(p, "L\"")) {
|
||||
cur = cur->next = read_utf32_string_literal(p, p + 1, ty_int);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-32 string literal
|
||||
if (starts_with(p, "U\"")) {
|
||||
cur = cur->next = read_utf32_string_literal(p, p + 1, ty_uint);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Character literal
|
||||
if (*p == '\'') {
|
||||
cur = cur->next = read_char_literal(p, p, ty_int);
|
||||
cur->val = (char)cur->val;
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-16 character literal
|
||||
if (starts_with(p, "u'")) {
|
||||
cur = cur->next = read_char_literal(p, p + 1, ty_ushort);
|
||||
cur->val &= 0xffff;
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Wide character literal
|
||||
if (starts_with(p, "L'")) {
|
||||
cur = cur->next = read_char_literal(p, p + 1, ty_int);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// UTF-32 character literal
|
||||
if (starts_with(p, "U'")) {
|
||||
cur = cur->next = read_char_literal(p, p + 1, ty_uint);
|
||||
p += cur->len;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Identifier or keyword
|
||||
char *q;
|
||||
if ((q = read_ident(p)) != NULL) {
|
||||
cur = cur->next = new_token(TK_IDENT, p, q);
|
||||
p = q;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Three-letter punctuators
|
||||
if (starts_with(p, "<<=") || starts_with(p, ">>=") ||
|
||||
starts_with(p, "...")) {
|
||||
cur = cur->next = new_token(TK_RESERVED, p, p + 3);
|
||||
p += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Two-letter punctuators
|
||||
if (starts_with(p, "==") || starts_with(p, "!=") || starts_with(p, "<=") ||
|
||||
starts_with(p, ">=") || starts_with(p, "->") || starts_with(p, "+=") ||
|
||||
starts_with(p, "-=") || starts_with(p, "*=") || starts_with(p, "/=") ||
|
||||
starts_with(p, "++") || starts_with(p, "--") || starts_with(p, "%=") ||
|
||||
starts_with(p, "&=") || starts_with(p, "|=") || starts_with(p, "^=") ||
|
||||
starts_with(p, "&&") || starts_with(p, "||") || starts_with(p, "<<") ||
|
||||
starts_with(p, ">>") || starts_with(p, "##")) {
|
||||
cur = cur->next = new_token(TK_RESERVED, p, p + 2);
|
||||
p += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Single-letter punctuators
|
||||
if (ispunct(*p)) {
|
||||
cur = cur->next = new_token(TK_RESERVED, p, p + 1);
|
||||
p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
error_at(p, "invalid token");
|
||||
}
|
||||
|
||||
cur = cur->next = new_token(TK_EOF, p, p);
|
||||
add_line_numbers(head.next);
|
||||
return head.next;
|
||||
}
|
||||
|
||||
// Returns the contents of a given file.
|
||||
static char *read_file(char *path) {
|
||||
FILE *fp;
|
||||
|
||||
if (strcmp(path, "-") == 0) {
|
||||
// By convention, read from stdin if a given filename is "-".
|
||||
fp = stdin;
|
||||
} else {
|
||||
fp = fopen(path, "r");
|
||||
if (!fp) return NULL;
|
||||
}
|
||||
|
||||
int buflen = 4096;
|
||||
int nread = 0;
|
||||
char *buf = calloc(1, buflen);
|
||||
|
||||
// Read the entire file.
|
||||
for (;;) {
|
||||
int end = buflen - 2; // extra 2 bytes for the trailing "\n\0"
|
||||
int n = fread(buf + nread, 1, end - nread, fp);
|
||||
if (n == 0) break;
|
||||
nread += n;
|
||||
if (nread == end) {
|
||||
buflen *= 2;
|
||||
buf = realloc(buf, buflen);
|
||||
}
|
||||
}
|
||||
|
||||
if (fp != stdin) fclose(fp);
|
||||
|
||||
// Make sure that the last logical line is properly terminated with '\n'.
|
||||
if (nread > 0 && buf[nread - 1] == '\\')
|
||||
buf[nread - 1] = '\n';
|
||||
else if (nread == 0 || buf[nread - 1] != '\n')
|
||||
buf[nread++] = '\n';
|
||||
|
||||
buf[nread] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
File **get_input_files(void) {
|
||||
return input_files;
|
||||
}
|
||||
|
||||
File *new_file(char *name, int file_no, char *contents) {
|
||||
File *file = calloc(1, sizeof(File));
|
||||
file->name = name;
|
||||
file->display_name = name;
|
||||
file->file_no = file_no;
|
||||
file->contents = contents;
|
||||
return file;
|
||||
}
|
||||
|
||||
// Replaces \r or \r\n with \n.
|
||||
static void canonicalize_newline(char *p) {
|
||||
int i = 0, j = 0;
|
||||
|
||||
while (p[i]) {
|
||||
if (p[i] == '\r' && p[i + 1] == '\n') {
|
||||
i += 2;
|
||||
p[j++] = '\n';
|
||||
} else if (p[i] == '\r') {
|
||||
i++;
|
||||
p[j++] = '\n';
|
||||
} else {
|
||||
p[j++] = p[i++];
|
||||
}
|
||||
}
|
||||
|
||||
p[j] = '\0';
|
||||
}
|
||||
|
||||
// Removes backslashes followed by a newline.
|
||||
static void remove_backslash_newline(char *p) {
|
||||
int i = 0, j = 0;
|
||||
|
||||
// We want to keep the number of newline characters so that
|
||||
// the logical line number matches the physical one.
|
||||
// This counter maintain the number of newlines we have removed.
|
||||
int n = 0;
|
||||
|
||||
while (p[i]) {
|
||||
if (p[i] == '\\' && p[i + 1] == '\n') {
|
||||
i += 2;
|
||||
n++;
|
||||
} else if (p[i] == '\n') {
|
||||
p[j++] = p[i++];
|
||||
for (; n > 0; n--) p[j++] = '\n';
|
||||
} else {
|
||||
p[j++] = p[i++];
|
||||
}
|
||||
}
|
||||
|
||||
p[j] = '\0';
|
||||
}
|
||||
|
||||
static uint32_t read_universal_char(char *p, int len) {
|
||||
uint32_t c = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (!isxdigit(p[i])) return 0;
|
||||
c = (c << 4) | from_hex(p[i]);
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
// Replace \u or \U escape sequences with corresponding UTF-8 bytes.
|
||||
static void convert_universal_chars(char *p) {
|
||||
char *q = p;
|
||||
|
||||
while (*p) {
|
||||
if (starts_with(p, "\\u")) {
|
||||
uint32_t c = read_universal_char(p + 2, 4);
|
||||
if (c) {
|
||||
p += 6;
|
||||
q += encode_utf8(q, c);
|
||||
} else {
|
||||
*q++ = *p++;
|
||||
}
|
||||
} else if (starts_with(p, "\\U")) {
|
||||
uint32_t c = read_universal_char(p + 2, 8);
|
||||
if (c) {
|
||||
p += 10;
|
||||
q += encode_utf8(q, c);
|
||||
} else {
|
||||
*q++ = *p++;
|
||||
}
|
||||
} else if (p[0] == '\\') {
|
||||
*q++ = *p++;
|
||||
*q++ = *p++;
|
||||
} else {
|
||||
*q++ = *p++;
|
||||
}
|
||||
}
|
||||
|
||||
*q = '\0';
|
||||
}
|
||||
|
||||
Token *tokenize_file(char *path) {
|
||||
char *p = read_file(path);
|
||||
if (!p) return NULL;
|
||||
|
||||
canonicalize_newline(p);
|
||||
remove_backslash_newline(p);
|
||||
convert_universal_chars(p);
|
||||
|
||||
// Save the filename for assembler .file directive.
|
||||
static int file_no;
|
||||
File *file = new_file(path, file_no + 1, p);
|
||||
|
||||
// Save the filename for assembler .file directive.
|
||||
input_files = realloc(input_files, sizeof(char *) * (file_no + 2));
|
||||
input_files[file_no] = file;
|
||||
input_files[file_no + 1] = NULL;
|
||||
file_no++;
|
||||
|
||||
return tokenize(file);
|
||||
}
|
286
third_party/chibicc/type.c
vendored
Normal file
286
third_party/chibicc/type.c
vendored
Normal file
|
@ -0,0 +1,286 @@
|
|||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
Type *ty_void = &(Type){TY_VOID, 1, 1};
|
||||
Type *ty_bool = &(Type){TY_BOOL, 1, 1};
|
||||
|
||||
Type *ty_char = &(Type){TY_CHAR, 1, 1};
|
||||
Type *ty_short = &(Type){TY_SHORT, 2, 2};
|
||||
Type *ty_int = &(Type){TY_INT, 4, 4};
|
||||
Type *ty_long = &(Type){TY_LONG, 8, 8};
|
||||
|
||||
Type *ty_uchar = &(Type){TY_CHAR, 1, 1, true};
|
||||
Type *ty_ushort = &(Type){TY_SHORT, 2, 2, true};
|
||||
Type *ty_uint = &(Type){TY_INT, 4, 4, true};
|
||||
Type *ty_ulong = &(Type){TY_LONG, 8, 8, true};
|
||||
|
||||
Type *ty_float = &(Type){TY_FLOAT, 4, 4};
|
||||
Type *ty_double = &(Type){TY_DOUBLE, 8, 8};
|
||||
Type *ty_ldouble = &(Type){TY_LDOUBLE, 16, 16};
|
||||
|
||||
static Type *new_type(TypeKind kind, int size, int align) {
|
||||
Type *ty = calloc(1, sizeof(Type));
|
||||
ty->kind = kind;
|
||||
ty->size = size;
|
||||
ty->align = align;
|
||||
return ty;
|
||||
}
|
||||
|
||||
bool is_integer(Type *ty) {
|
||||
TypeKind k = ty->kind;
|
||||
return k == TY_BOOL || k == TY_CHAR || k == TY_SHORT || k == TY_INT ||
|
||||
k == TY_LONG || k == TY_ENUM;
|
||||
}
|
||||
|
||||
bool is_flonum(Type *ty) {
|
||||
return ty->kind == TY_FLOAT || ty->kind == TY_DOUBLE ||
|
||||
ty->kind == TY_LDOUBLE;
|
||||
}
|
||||
|
||||
bool is_numeric(Type *ty) {
|
||||
return is_integer(ty) || is_flonum(ty);
|
||||
}
|
||||
|
||||
bool is_compatible(Type *t1, Type *t2) {
|
||||
if (t1 == t2) return true;
|
||||
|
||||
if (t1->origin) return is_compatible(t1->origin, t2);
|
||||
|
||||
if (t2->origin) return is_compatible(t1, t2->origin);
|
||||
|
||||
if (t1->kind != t2->kind) return false;
|
||||
|
||||
switch (t1->kind) {
|
||||
case TY_CHAR:
|
||||
case TY_SHORT:
|
||||
case TY_INT:
|
||||
case TY_LONG:
|
||||
return t1->is_unsigned == t2->is_unsigned;
|
||||
case TY_FLOAT:
|
||||
case TY_DOUBLE:
|
||||
case TY_LDOUBLE:
|
||||
return true;
|
||||
case TY_PTR:
|
||||
return is_compatible(t1->base, t2->base);
|
||||
case TY_FUNC: {
|
||||
if (!is_compatible(t1->return_ty, t2->return_ty)) return false;
|
||||
if (t1->is_variadic != t2->is_variadic) return false;
|
||||
|
||||
Type *p1 = t1->params;
|
||||
Type *p2 = t2->params;
|
||||
for (; p1 && p2; p1 = p1->next, p2 = p2->next)
|
||||
if (!is_compatible(p1, p2)) return false;
|
||||
return p1 == NULL && p2 == NULL;
|
||||
}
|
||||
case TY_ARRAY:
|
||||
if (!is_compatible(t1->base, t2->base)) return false;
|
||||
return t1->array_len < 0 && t2->array_len < 0 &&
|
||||
t1->array_len == t2->array_len;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Type *copy_type(Type *ty) {
|
||||
Type *ret = calloc(1, sizeof(Type));
|
||||
*ret = *ty;
|
||||
ret->origin = ty;
|
||||
return ret;
|
||||
}
|
||||
|
||||
Type *pointer_to(Type *base) {
|
||||
Type *ty = new_type(TY_PTR, 8, 8);
|
||||
ty->base = base;
|
||||
ty->is_unsigned = true;
|
||||
return ty;
|
||||
}
|
||||
|
||||
Type *func_type(Type *return_ty) {
|
||||
// The C spec disallows sizeof(<function type>), but
|
||||
// GCC allows that and the expression is evaluated to 1.
|
||||
Type *ty = new_type(TY_FUNC, 1, 1);
|
||||
ty->return_ty = return_ty;
|
||||
return ty;
|
||||
}
|
||||
|
||||
Type *array_of(Type *base, int len) {
|
||||
Type *ty = new_type(TY_ARRAY, base->size * len, base->align);
|
||||
ty->base = base;
|
||||
ty->array_len = len;
|
||||
return ty;
|
||||
}
|
||||
|
||||
Type *vla_of(Type *base, Node *len) {
|
||||
Type *ty = new_type(TY_VLA, 8, 8);
|
||||
ty->base = base;
|
||||
ty->vla_len = len;
|
||||
return ty;
|
||||
}
|
||||
|
||||
Type *enum_type(void) {
|
||||
return new_type(TY_ENUM, 4, 4);
|
||||
}
|
||||
|
||||
Type *struct_type(void) {
|
||||
return new_type(TY_STRUCT, 0, 1);
|
||||
}
|
||||
|
||||
static Type *get_common_type(Type *ty1, Type *ty2) {
|
||||
if (ty1->base) return pointer_to(ty1->base);
|
||||
|
||||
if (ty1->kind == TY_FUNC) return pointer_to(ty1);
|
||||
if (ty2->kind == TY_FUNC) return pointer_to(ty2);
|
||||
|
||||
if (ty1->kind == TY_LDOUBLE || ty2->kind == TY_LDOUBLE) return ty_ldouble;
|
||||
if (ty1->kind == TY_DOUBLE || ty2->kind == TY_DOUBLE) return ty_double;
|
||||
if (ty1->kind == TY_FLOAT || ty2->kind == TY_FLOAT) return ty_float;
|
||||
|
||||
if (ty1->size < 4) ty1 = ty_int;
|
||||
if (ty2->size < 4) ty2 = ty_int;
|
||||
|
||||
if (ty1->size != ty2->size) return (ty1->size < ty2->size) ? ty2 : ty1;
|
||||
|
||||
if (ty2->is_unsigned) return ty2;
|
||||
return ty1;
|
||||
}
|
||||
|
||||
// For many binary operators, we implicitly promote operands so that
|
||||
// both operands have the same type. Any integral type smaller than
|
||||
// int is always promoted to int. If the type of one operand is larger
|
||||
// than the other's (e.g. "long" vs. "int"), the smaller operand will
|
||||
// be promoted to match with the other.
|
||||
//
|
||||
// This operation is called the "usual arithmetic conversion".
|
||||
static void usual_arith_conv(Node **lhs, Node **rhs) {
|
||||
Type *ty = get_common_type((*lhs)->ty, (*rhs)->ty);
|
||||
*lhs = new_cast(*lhs, ty);
|
||||
*rhs = new_cast(*rhs, ty);
|
||||
}
|
||||
|
||||
void add_type(Node *node) {
|
||||
if (!node || node->ty) return;
|
||||
|
||||
add_type(node->lhs);
|
||||
add_type(node->rhs);
|
||||
add_type(node->cond);
|
||||
add_type(node->then);
|
||||
add_type(node->els);
|
||||
add_type(node->init);
|
||||
add_type(node->inc);
|
||||
|
||||
for (Node *n = node->body; n; n = n->next) add_type(n);
|
||||
for (Node *n = node->args; n; n = n->next) add_type(n);
|
||||
|
||||
switch (node->kind) {
|
||||
case ND_NUM:
|
||||
node->ty = ty_int;
|
||||
return;
|
||||
case ND_ADD:
|
||||
case ND_SUB:
|
||||
case ND_MUL:
|
||||
case ND_DIV:
|
||||
case ND_MOD:
|
||||
case ND_BITAND:
|
||||
case ND_BITOR:
|
||||
case ND_BITXOR:
|
||||
usual_arith_conv(&node->lhs, &node->rhs);
|
||||
node->ty = node->lhs->ty;
|
||||
return;
|
||||
case ND_NEG: {
|
||||
Type *ty = get_common_type(ty_int, node->lhs->ty);
|
||||
node->lhs = new_cast(node->lhs, ty);
|
||||
node->ty = ty;
|
||||
return;
|
||||
}
|
||||
case ND_ASSIGN:
|
||||
if (node->lhs->ty->kind == TY_ARRAY)
|
||||
error_tok(node->lhs->tok, "not an lvalue");
|
||||
if (node->lhs->ty->kind != TY_STRUCT)
|
||||
node->rhs = new_cast(node->rhs, node->lhs->ty);
|
||||
node->ty = node->lhs->ty;
|
||||
return;
|
||||
case ND_EQ:
|
||||
case ND_NE:
|
||||
case ND_LT:
|
||||
case ND_LE:
|
||||
usual_arith_conv(&node->lhs, &node->rhs);
|
||||
node->ty = ty_int;
|
||||
return;
|
||||
case ND_FUNCALL:
|
||||
node->ty = node->func_ty->return_ty;
|
||||
return;
|
||||
case ND_NOT:
|
||||
case ND_LOGOR:
|
||||
case ND_LOGAND:
|
||||
node->ty = ty_int;
|
||||
return;
|
||||
case ND_BITNOT:
|
||||
case ND_SHL:
|
||||
case ND_SHR:
|
||||
node->ty = node->lhs->ty;
|
||||
return;
|
||||
case ND_VAR:
|
||||
case ND_VLA_PTR:
|
||||
node->ty = node->var->ty;
|
||||
return;
|
||||
case ND_COND:
|
||||
if (node->then->ty->kind == TY_VOID || node->els->ty->kind == TY_VOID) {
|
||||
node->ty = ty_void;
|
||||
} else {
|
||||
usual_arith_conv(&node->then, &node->els);
|
||||
node->ty = node->then->ty;
|
||||
}
|
||||
return;
|
||||
case ND_COMMA:
|
||||
node->ty = node->rhs->ty;
|
||||
return;
|
||||
case ND_MEMBER:
|
||||
node->ty = node->member->ty;
|
||||
return;
|
||||
case ND_ADDR: {
|
||||
Type *ty = node->lhs->ty;
|
||||
if (ty->kind == TY_ARRAY)
|
||||
node->ty = pointer_to(ty->base);
|
||||
else
|
||||
node->ty = pointer_to(ty);
|
||||
return;
|
||||
}
|
||||
case ND_DEREF:
|
||||
if (!node->lhs->ty->base)
|
||||
error_tok(node->tok, "invalid pointer dereference");
|
||||
if (node->lhs->ty->base->kind == TY_VOID)
|
||||
error_tok(node->tok, "dereferencing a void pointer");
|
||||
|
||||
node->ty = node->lhs->ty->base;
|
||||
return;
|
||||
case ND_STMT_EXPR:
|
||||
if (node->body) {
|
||||
Node *stmt = node->body;
|
||||
while (stmt->next) stmt = stmt->next;
|
||||
if (stmt->kind == ND_EXPR_STMT) {
|
||||
node->ty = stmt->lhs->ty;
|
||||
return;
|
||||
}
|
||||
}
|
||||
error_tok(node->tok,
|
||||
"statement expression returning void is not supported");
|
||||
return;
|
||||
case ND_LABEL_VAL:
|
||||
node->ty = pointer_to(ty_void);
|
||||
return;
|
||||
case ND_CAS:
|
||||
add_type(node->cas_addr);
|
||||
add_type(node->cas_old);
|
||||
add_type(node->cas_new);
|
||||
node->ty = ty_bool;
|
||||
|
||||
if (node->cas_addr->ty->kind != TY_PTR)
|
||||
error_tok(node->cas_addr->tok, "pointer expected");
|
||||
if (node->cas_old->ty->kind != TY_PTR)
|
||||
error_tok(node->cas_old->tok, "pointer expected");
|
||||
return;
|
||||
case ND_EXCH:
|
||||
if (node->lhs->ty->kind != TY_PTR)
|
||||
error_tok(node->cas_addr->tok, "pointer expected");
|
||||
node->ty = node->lhs->ty->base;
|
||||
return;
|
||||
}
|
||||
}
|
186
third_party/chibicc/unicode.c
vendored
Normal file
186
third_party/chibicc/unicode.c
vendored
Normal file
|
@ -0,0 +1,186 @@
|
|||
#include "third_party/chibicc/chibicc.h"
|
||||
|
||||
// Encode a given character in UTF-8.
|
||||
int encode_utf8(char *buf, uint32_t c) {
|
||||
if (c <= 0x7F) {
|
||||
buf[0] = c;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (c <= 0x7FF) {
|
||||
buf[0] = 0b11000000 | (c >> 6);
|
||||
buf[1] = 0b10000000 | (c & 0b00111111);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
buf[0] = 0b11100000 | (c >> 12);
|
||||
buf[1] = 0b10000000 | ((c >> 6) & 0b00111111);
|
||||
buf[2] = 0b10000000 | (c & 0b00111111);
|
||||
return 3;
|
||||
}
|
||||
|
||||
buf[0] = 0b11110000 | (c >> 18);
|
||||
buf[1] = 0b10000000 | ((c >> 12) & 0b00111111);
|
||||
buf[2] = 0b10000000 | ((c >> 6) & 0b00111111);
|
||||
buf[3] = 0b10000000 | (c & 0b00111111);
|
||||
return 4;
|
||||
}
|
||||
|
||||
// Read a UTF-8-encoded Unicode code point from a source file.
|
||||
// We assume that source files are always in UTF-8.
|
||||
//
|
||||
// UTF-8 is a variable-width encoding in which one code point is
|
||||
// encoded in one to four bytes. One byte UTF-8 code points are
|
||||
// identical to ASCII. Non-ASCII characters are encoded using more
|
||||
// than one byte.
|
||||
uint32_t decode_utf8(char **new_pos, char *p) {
|
||||
if ((unsigned char)*p < 128) {
|
||||
*new_pos = p + 1;
|
||||
return *p;
|
||||
}
|
||||
|
||||
char *start = p;
|
||||
int len;
|
||||
uint32_t c;
|
||||
|
||||
if ((unsigned char)*p >= 0b11110000) {
|
||||
len = 4;
|
||||
c = *p & 0b111;
|
||||
} else if ((unsigned char)*p >= 0b11100000) {
|
||||
len = 3;
|
||||
c = *p & 0b1111;
|
||||
} else if ((unsigned char)*p >= 0b11000000) {
|
||||
len = 2;
|
||||
c = *p & 0b11111;
|
||||
} else {
|
||||
error_at(start, "invalid UTF-8 sequence");
|
||||
}
|
||||
|
||||
for (int i = 1; i < len; i++) {
|
||||
if ((unsigned char)p[i] >> 6 != 0b10)
|
||||
error_at(start, "invalid UTF-8 sequence");
|
||||
c = (c << 6) | (p[i] & 0b111111);
|
||||
}
|
||||
|
||||
*new_pos = p + len;
|
||||
return c;
|
||||
}
|
||||
|
||||
static bool in_range(uint32_t *range, uint32_t c) {
|
||||
for (int i = 0; range[i] != -1; i += 2)
|
||||
if (range[i] <= c && c <= range[i + 1]) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// C11 allows not only ASCII but some multibyte characters in certan
|
||||
// Unicode ranges to be used in an identifier. See C11 Annex D for the
|
||||
// details.
|
||||
//
|
||||
// This function returns true if a given character is acceptable as
|
||||
// the first character of an identifier.
|
||||
//
|
||||
// For example, ¾ (U+00BE) is a valid identifier because characters in
|
||||
// 0x00BE-0x00C0 are allowed, while neither ⟘ (U+27D8) nor ' '
|
||||
// (U+3000, full-width space) are allowed because they are out of range.
|
||||
bool is_ident1(uint32_t c) {
|
||||
static uint32_t range[] = {
|
||||
'_', '_', 'a', 'z', 'A', 'Z', '$', '$',
|
||||
0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF,
|
||||
0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6,
|
||||
0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F,
|
||||
0x1681, 0x180D, 0x180F, 0x1DBF, 0x1E00, 0x1FFF, 0x200B, 0x200D,
|
||||
0x202A, 0x202E, 0x203F, 0x2040, 0x2054, 0x2054, 0x2060, 0x206F,
|
||||
0x2070, 0x20CF, 0x2100, 0x218F, 0x2460, 0x24FF, 0x2776, 0x2793,
|
||||
0x2C00, 0x2DFF, 0x2E80, 0x2FFF, 0x3004, 0x3007, 0x3021, 0x302F,
|
||||
0x3031, 0x303F, 0x3040, 0xD7FF, 0xF900, 0xFD3D, 0xFD40, 0xFDCF,
|
||||
0xFDF0, 0xFE1F, 0xFE30, 0xFE44, 0xFE47, 0xFFFD, 0x10000, 0x1FFFD,
|
||||
0x20000, 0x2FFFD, 0x30000, 0x3FFFD, 0x40000, 0x4FFFD, 0x50000, 0x5FFFD,
|
||||
0x60000, 0x6FFFD, 0x70000, 0x7FFFD, 0x80000, 0x8FFFD, 0x90000, 0x9FFFD,
|
||||
0xA0000, 0xAFFFD, 0xB0000, 0xBFFFD, 0xC0000, 0xCFFFD, 0xD0000, 0xDFFFD,
|
||||
0xE0000, 0xEFFFD, -1,
|
||||
};
|
||||
|
||||
return in_range(range, c);
|
||||
}
|
||||
|
||||
// Returns true if a given character is acceptable as a non-first
|
||||
// character of an identifier.
|
||||
bool is_ident2(uint32_t c) {
|
||||
static uint32_t range[] = {
|
||||
'0', '9', '$', '$', 0x0300, 0x036F, 0x1DC0,
|
||||
0x1DFF, 0x20D0, 0x20FF, 0xFE20, 0xFE2F, -1,
|
||||
};
|
||||
|
||||
return is_ident1(c) || in_range(range, c);
|
||||
}
|
||||
|
||||
// Returns the number of columns needed to display a given
|
||||
// character in a fixed-width font.
|
||||
//
|
||||
// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
||||
static int char_width(uint32_t c) {
|
||||
static uint32_t range1[] = {
|
||||
0x0000, 0x001F, 0x007f, 0x00a0, 0x0300, 0x036F, 0x0483, 0x0486,
|
||||
0x0488, 0x0489, 0x0591, 0x05BD, 0x05BF, 0x05BF, 0x05C1, 0x05C2,
|
||||
0x05C4, 0x05C5, 0x05C7, 0x05C7, 0x0600, 0x0603, 0x0610, 0x0615,
|
||||
0x064B, 0x065E, 0x0670, 0x0670, 0x06D6, 0x06E4, 0x06E7, 0x06E8,
|
||||
0x06EA, 0x06ED, 0x070F, 0x070F, 0x0711, 0x0711, 0x0730, 0x074A,
|
||||
0x07A6, 0x07B0, 0x07EB, 0x07F3, 0x0901, 0x0902, 0x093C, 0x093C,
|
||||
0x0941, 0x0948, 0x094D, 0x094D, 0x0951, 0x0954, 0x0962, 0x0963,
|
||||
0x0981, 0x0981, 0x09BC, 0x09BC, 0x09C1, 0x09C4, 0x09CD, 0x09CD,
|
||||
0x09E2, 0x09E3, 0x0A01, 0x0A02, 0x0A3C, 0x0A3C, 0x0A41, 0x0A42,
|
||||
0x0A47, 0x0A48, 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A82,
|
||||
0x0ABC, 0x0ABC, 0x0AC1, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0ACD,
|
||||
0x0AE2, 0x0AE3, 0x0B01, 0x0B01, 0x0B3C, 0x0B3C, 0x0B3F, 0x0B3F,
|
||||
0x0B41, 0x0B43, 0x0B4D, 0x0B4D, 0x0B56, 0x0B56, 0x0B82, 0x0B82,
|
||||
0x0BC0, 0x0BC0, 0x0BCD, 0x0BCD, 0x0C3E, 0x0C40, 0x0C46, 0x0C48,
|
||||
0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 0x0CBC, 0x0CBC, 0x0CBF, 0x0CBF,
|
||||
0x0CC6, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D41, 0x0D43,
|
||||
0x0D4D, 0x0D4D, 0x0DCA, 0x0DCA, 0x0DD2, 0x0DD4, 0x0DD6, 0x0DD6,
|
||||
0x0E31, 0x0E31, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB1, 0x0EB1,
|
||||
0x0EB4, 0x0EB9, 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19,
|
||||
0x0F35, 0x0F35, 0x0F37, 0x0F37, 0x0F39, 0x0F39, 0x0F71, 0x0F7E,
|
||||
0x0F80, 0x0F84, 0x0F86, 0x0F87, 0x0F90, 0x0F97, 0x0F99, 0x0FBC,
|
||||
0x0FC6, 0x0FC6, 0x102D, 0x1030, 0x1032, 0x1032, 0x1036, 0x1037,
|
||||
0x1039, 0x1039, 0x1058, 0x1059, 0x1160, 0x11FF, 0x135F, 0x135F,
|
||||
0x1712, 0x1714, 0x1732, 0x1734, 0x1752, 0x1753, 0x1772, 0x1773,
|
||||
0x17B4, 0x17B5, 0x17B7, 0x17BD, 0x17C6, 0x17C6, 0x17C9, 0x17D3,
|
||||
0x17DD, 0x17DD, 0x180B, 0x180D, 0x18A9, 0x18A9, 0x1920, 0x1922,
|
||||
0x1927, 0x1928, 0x1932, 0x1932, 0x1939, 0x193B, 0x1A17, 0x1A18,
|
||||
0x1B00, 0x1B03, 0x1B34, 0x1B34, 0x1B36, 0x1B3A, 0x1B3C, 0x1B3C,
|
||||
0x1B42, 0x1B42, 0x1B6B, 0x1B73, 0x1DC0, 0x1DCA, 0x1DFE, 0x1DFF,
|
||||
0x200B, 0x200F, 0x202A, 0x202E, 0x2060, 0x2063, 0x206A, 0x206F,
|
||||
0x20D0, 0x20EF, 0x302A, 0x302F, 0x3099, 0x309A, 0xA806, 0xA806,
|
||||
0xA80B, 0xA80B, 0xA825, 0xA826, 0xFB1E, 0xFB1E, 0xFE00, 0xFE0F,
|
||||
0xFE20, 0xFE23, 0xFEFF, 0xFEFF, 0xFFF9, 0xFFFB, 0x10A01, 0x10A03,
|
||||
0x10A05, 0x10A06, 0x10A0C, 0x10A0F, 0x10A38, 0x10A3A, 0x10A3F, 0x10A3F,
|
||||
0x1D167, 0x1D169, 0x1D173, 0x1D182, 0x1D185, 0x1D18B, 0x1D1AA, 0x1D1AD,
|
||||
0x1D242, 0x1D244, 0xE0001, 0xE0001, 0xE0020, 0xE007F, 0xE0100, 0xE01EF,
|
||||
-1,
|
||||
};
|
||||
|
||||
if (in_range(range1, c)) return 0;
|
||||
|
||||
static uint32_t range2[] = {
|
||||
0x1100, 0x115F, 0x2329, 0x2329, 0x232A, 0x232A, 0x2E80, 0x303E,
|
||||
0x3040, 0xA4CF, 0xAC00, 0xD7A3, 0xF900, 0xFAFF, 0xFE10, 0xFE19,
|
||||
0xFE30, 0xFE6F, 0xFF00, 0xFF60, 0xFFE0, 0xFFE6, 0x1F000, 0x1F644,
|
||||
0x20000, 0x2FFFD, 0x30000, 0x3FFFD, -1,
|
||||
};
|
||||
|
||||
if (in_range(range2, c)) return 2;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Returns the number of columns needed to display a given
|
||||
// string in a fixed-width font.
|
||||
int str_width(char *p, int len) {
|
||||
char *start = p;
|
||||
int w = 0;
|
||||
while (p - start < len) {
|
||||
uint32_t c = decode_utf8(&p, p);
|
||||
w += char_width(c);
|
||||
}
|
||||
return w;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue