Make fixes, improvements, and chibicc python bindings

- python now mixes audio 10x faster
- python octal notation is restored
- chibicc now builds code 3x faster
- chibicc now has help documentation
- chibicc can now generate basic python bindings
- linenoise now supports some paredit-like features

See #141
This commit is contained in:
Justine Tunney 2021-10-08 08:11:51 -07:00
parent 28997f3acb
commit 7061c79c22
121 changed files with 5272 additions and 1928 deletions

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/bits/popcnt.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/stat.h"
#include "libc/elf/def.h"
@ -26,6 +27,7 @@
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/crc32.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
@ -35,93 +37,6 @@
#include "third_party/gdtoa/gdtoa.h"
#include "tool/build/lib/elfwriter.h"
/**
* @fileoverview Assembler
*
* This program turns assembly into relocatable NexGen32e ELF objects.
* That process is normally an implementation detail of your compiler,
* which can embed this program or launch it as a subprocess. Much GNU
* style syntax is supported. Your code that gets embedded in an asm()
* statement will ultimately end up here. This implementation, has the
* advantage of behaving the same across platforms, in a simple single
* file implementation that compiles down to a 100kilo ape executable.
*
* Your assembler supports the following flags:
*
* -o FILE output path [default: a.out]
* -I DIR append include path [default: .]
* -W inhibit .warning
* -Z inhibit .error and .err
*
* Your assembler supports the following directives:
*
* .zero INT... emits int8
* .word INT... emits int16
* .long INT... emits int32
* .quad INT... emits int64
* .ascii STR... emits string
* .asciz STR... emits string and 0 byte
* .ident STR emits string to .comment section
* .float NUMBER... emits binary32
* .double NUMBER... emits binary64
* .float80 NUMBER... emits x86 float (10 bytes)
* .ldbl NUMBER... emits x86 float (16 bytes)
* .sleb128 NUMBER... emits LEB-128 signed varint
* .uleb128 NUMBER... emits LEB-128 unsigned varint
* .align BYTES [FILL [MAXSKIP]] emits fill bytes to boundary
* .end halts tokenization
* .abort crashes assembler
* .err aborts (ignorable w/ -Z)
* .error STR aborts (ignorable w/ -Z)
* .warning STR whines (ignorable w/ -W)
* .text enters text section (default)
* .data enters data section
* .bss enters bss section
* .section NAME [SFLG SHT] enters section
* .previous enters previous section
* .pushsection NAME [SFLG SHT] pushes section
* .popsection pops section
* .type SYM TYPE sets type of symbol
* .size SYM EXPR sets size of symbol
* .internal SYM... marks symbol STV_INTERNAL
* .hidden SYM... marks symbol STV_HIDDEN
* .protected SYM... marks symbol STV_PROTECTED
* .globl SYM... marks symbol STB_GLOBAL
* .local SYM... marks symbol STB_LOCAL
* .weak SYM... marks symbol STB_WEAK
* .include FILE assembles file source
* .incbin FILE emits file content
* .file FILENO PATH dwarf file define
* .loc FILENO LINENO dwarf source line
*
* TYPE can be one of the following:
*
* - @notype STT_NOTYPE (default)
* - @object STT_OBJECT
* - @function STT_FUNC
* - @common STT_COMMON
* - @tls_object STT_TLS
*
* SHT can be one of the following:
*
* - @progbits SHT_PROGBITS
* - @note SHT_NOTE
* - @nobits SHT_NOBITS
* - @preinit_array SHT_PREINIT_ARRAY
* - @init_array SHT_INIT_ARRAY
* - @fini_array SHT_FINI_ARRAY
*
* SFLG is a string which may have the following characters:
*
* - a SHF_ALLOC
* - w SHF_WRITE
* - x SHF_EXECINSTR
* - g SHF_GROUP
* - M SHF_MERGE
* - S SHF_STRINGS
* - T SHF_TLS
*/
#define OSZ 0x66
#define ASZ 0x67
#define REX 0x40 // byte
@ -144,7 +59,7 @@
#define IS(P, N, S) (N == sizeof(S) - 1 && !strncasecmp(P, S, sizeof(S) - 1))
#define MAX(X, Y) ((Y) < (X) ? (X) : (Y))
#define READ128BE(S) ((unsigned __int128)READ64BE(S) << 64 | READ64BE((S) + 8))
#define READ128BE(S) ((uint128_t)READ64BE(S) << 64 | READ64BE((S) + 8))
struct As {
int i; // things
@ -158,7 +73,7 @@ struct As {
bool inhibitwarn;
struct Ints {
unsigned long n, c;
long *p;
int128_t *p;
} ints;
struct Floats {
unsigned long n, c;
@ -276,7 +191,7 @@ struct As {
unsigned tok;
int lhs;
int rhs;
long x;
int128_t x;
bool isvisited;
bool isevaluated;
} * p;
@ -431,14 +346,8 @@ static const struct Reg {
{"xmm9", 1 | 4<<3 | REXR<<8, 1 | 4<<3 | REXB<<8, -1, -1 },
} /* clang-format on */;
static unsigned Hash(const void *p, unsigned long n) {
unsigned h, i;
for (h = i = 0; i < n; i++) {
h += ((unsigned char *)p)[i];
h *= 0x9e3779b1;
}
return MAX(1, h);
}
long as_hashmap_hits;
long as_hashmap_miss;
static bool IsPunctMergeable(int c) {
switch (c) {
@ -586,6 +495,7 @@ static void ReadFlags(struct As *a, int argc, char *argv[]) {
}
static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) {
int x;
if (c != '\\') return c;
switch ((c = p[(*i)++])) {
case 'a':
@ -605,10 +515,10 @@ static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) {
case 'e':
return 033;
case 'x':
if (isxdigit(p[*i])) {
c = hextoint(p[(*i)++]);
if (isxdigit(p[*i])) {
c = c * 16 + hextoint(p[(*i)++]);
if ((x = kHexToInt[p[*i] & 255]) != -1) {
*i += 1, c = x;
if ((x = kHexToInt[p[*i] & 255]) != -1) {
*i += 1, c = c << 4 | x;
}
}
return c;
@ -670,6 +580,10 @@ static void Tokenize(struct As *a, int path) {
char *p, *path2;
struct Slice buf;
bool bol, isfloat, isfpu;
if (!fileexists(a->strings.p[path])) {
fprintf(stderr, "%s: file not found\n", a->strings.p[path]);
exit(1);
}
p = SaveString(&a->strings, read_file(a->strings.p[path]));
p = skip_bom(p);
canonicalize_newline(p);
@ -779,7 +693,7 @@ static void Tokenize(struct As *a, int path) {
a->things.p[a->things.n - 1].t = TT_FLOAT;
} else {
APPEND(a->ints);
a->ints.p[a->ints.n - 1] = strtoul(p, NULL, 0);
a->ints.p[a->ints.n - 1] = strtoumax(p, NULL, 0);
a->things.p[a->things.n - 1].i = a->ints.n - 1;
if (p[i] == 'f' || p[i] == 'F') {
a->things.p[a->things.n - 1].t = TT_FORWARD;
@ -859,22 +773,28 @@ static void Tokenize(struct As *a, int path) {
static int GetSymbol(struct As *a, int name) {
struct HashEntry *p;
unsigned i, j, k, n, m, h, n2;
h = Hash(a->slices.p[name].p, a->slices.p[name].n);
if (!(h = crc32c(0, a->slices.p[name].p, a->slices.p[name].n))) h = 1;
n = a->symbolindex.n;
i = 0;
if (n) {
k = 0;
do {
for (;;) {
i = (h + k + ((k + 1) >> 1)) & (n - 1);
if (a->symbolindex.p[i].h == h &&
a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].n ==
a->slices.p[name].n &&
!memcmp(a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].p,
a->slices.p[name].p, a->slices.p[name].n)) {
++as_hashmap_hits;
return a->symbolindex.p[i].i;
}
++k;
} while (a->symbolindex.p[i].h);
if (!a->symbolindex.p[i].h) {
break;
} else {
++k;
++as_hashmap_miss;
}
}
}
if (++a->symbolindex.i >= (n >> 1)) {
m = n ? n << 1 : 16;
@ -983,7 +903,7 @@ static void ConsumeComma(struct As *a) {
ConsumePunct(a, ',');
}
static int NewPrimary(struct As *a, enum ExprKind k, long x) {
static int NewPrimary(struct As *a, enum ExprKind k, int128_t x) {
AppendExpr(a);
a->exprs.p[a->exprs.n - 1].kind = k;
a->exprs.p[a->exprs.n - 1].x = x;
@ -1299,7 +1219,7 @@ static int Parse(struct As *a) {
return ParseOr(a, &a->i, a->i);
}
static long GetInt(struct As *a) {
static int128_t GetInt(struct As *a) {
int x;
x = Parse(a);
if (a->exprs.p[x].kind == EX_INT) {
@ -1331,7 +1251,7 @@ static struct Slice GetSlice(struct As *a) {
}
}
static void EmitData(struct As *a, const void *p, unsigned long n) {
static void EmitData(struct As *a, const void *p, uint128_t n) {
struct Slice *s;
s = &a->sections.p[a->section].binary;
s->p = realloc(s->p, s->n + n);
@ -1339,41 +1259,50 @@ static void EmitData(struct As *a, const void *p, unsigned long n) {
s->n += n;
}
static void EmitByte(struct As *a, unsigned long x) {
static void EmitByte(struct As *a, uint128_t i) {
uint8_t x = i;
unsigned char b[1];
b[0] = x >> 000;
b[0] = (x & 0xff) >> 000;
EmitData(a, b, 1);
}
static void EmitWord(struct As *a, unsigned long x) {
static void EmitWord(struct As *a, uint128_t i) {
uint16_t x = i;
unsigned char b[2];
b[0] = x >> 000;
b[1] = x >> 010;
b[0] = (x & 0x00ff) >> 000;
b[1] = (x & 0xff00) >> 010;
EmitData(a, b, 2);
}
static void EmitLong(struct As *a, unsigned long x) {
static void EmitLong(struct As *a, uint128_t i) {
uint32_t x = i;
unsigned char b[4];
b[0] = x >> 000;
b[1] = x >> 010;
b[2] = x >> 020;
b[3] = x >> 030;
b[0] = (x & 0x000000ff) >> 000;
b[1] = (x & 0x0000ff00) >> 010;
b[2] = (x & 0x00ff0000) >> 020;
b[3] = (x & 0xff000000) >> 030;
EmitData(a, b, 4);
}
void EmitQuad(struct As *a, unsigned long x) {
void EmitQuad(struct As *a, uint128_t i) {
uint64_t x = i;
unsigned char b[8];
b[0] = x >> 000;
b[1] = x >> 010;
b[2] = x >> 020;
b[3] = x >> 030;
b[4] = x >> 040;
b[5] = x >> 050;
b[6] = x >> 060;
b[7] = x >> 070;
b[0] = (x & 0x00000000000000ff) >> 000;
b[1] = (x & 0x000000000000ff00) >> 010;
b[2] = (x & 0x0000000000ff0000) >> 020;
b[3] = (x & 0x00000000ff000000) >> 030;
b[4] = (x & 0x000000ff00000000) >> 040;
b[5] = (x & 0x0000ff0000000000) >> 050;
b[6] = (x & 0x00ff000000000000) >> 060;
b[7] = (x & 0xff00000000000000) >> 070;
EmitData(a, b, 8);
}
void EmitOcta(struct As *a, uint128_t i) {
EmitQuad(a, i);
EmitQuad(a, i >> 64);
}
static void EmitVarword(struct As *a, unsigned long x) {
if (x > 255) EmitVarword(a, x >> 8);
EmitByte(a, x);
@ -1381,7 +1310,7 @@ static void EmitVarword(struct As *a, unsigned long x) {
static void OnSleb128(struct As *a, struct Slice s) {
int c;
long x;
int128_t x;
for (;;) {
x = GetInt(a);
for (;;) {
@ -1401,7 +1330,7 @@ static void OnSleb128(struct As *a, struct Slice s) {
static void OnUleb128(struct As *a, struct Slice s) {
int c;
unsigned long x;
uint128_t x;
for (;;) {
x = GetInt(a);
do {
@ -1415,6 +1344,23 @@ static void OnUleb128(struct As *a, struct Slice s) {
}
}
static void OnZleb128(struct As *a, struct Slice s) {
int c;
uint128_t x;
for (;;) {
x = GetInt(a);
x = (x << 1) ^ ((int128_t)x >> 127);
do {
c = x & 0x7f;
x >>= 7;
if (x) c |= 0x80;
EmitByte(a, c);
} while (x);
if (IsSemicolon(a)) break;
ConsumeComma(a);
}
}
static void OnZero(struct As *a, struct Slice s) {
long n;
char *p;
@ -1460,7 +1406,7 @@ static long GetRelaAddend(int kind) {
}
static void EmitExpr(struct As *a, int expr, int kind,
void emitter(struct As *, unsigned long)) {
void emitter(struct As *, uint128_t)) {
if (expr == -1) {
emitter(a, 0);
} else if (a->exprs.p[expr].kind == EX_INT) {
@ -1477,7 +1423,7 @@ static void EmitExpr(struct As *a, int expr, int kind,
}
static void OpInt(struct As *a, int kind,
void emitter(struct As *, unsigned long)) {
void emitter(struct As *, uint128_t)) {
for (;;) {
EmitExpr(a, Parse(a), kind, emitter);
if (IsSemicolon(a)) break;
@ -1501,6 +1447,10 @@ static void OnQuad(struct As *a, struct Slice s) {
OpInt(a, R_X86_64_64, EmitQuad);
}
static void OnOcta(struct As *a, struct Slice s) {
OpInt(a, R_X86_64_64, EmitOcta);
}
static void OnFloat(struct As *a, struct Slice s) {
float f;
char b[4];
@ -1620,7 +1570,7 @@ static void OnPrevious(struct As *a, struct Slice s) {
static void OnAlign(struct As *a, struct Slice s) {
long i, n, align, fill, maxskip;
align = GetInt(a);
if (__builtin_popcountl(align) != 1) Fail(a, "alignment not power of 2");
if (!IS2POW(align)) Fail(a, "alignment not power of 2");
fill = (a->sections.p[a->section].flags & SHF_EXECINSTR) ? 0x90 : 0;
maxskip = 268435456;
if (IsComma(a)) {
@ -1910,7 +1860,7 @@ static unsigned long MakeKey64(const char *p, int n) {
return READ64BE(k);
}
static unsigned __int128 MakeKey128(const char *p, int n) {
static uint128_t MakeKey128(const char *p, int n) {
char k[16] = {0};
CopyLower(k, p, n);
return READ128BE(k);
@ -2088,7 +2038,7 @@ static void EmitImm(struct As *a, int reg, int imm) {
static void EmitModrm(struct As *a, int reg, int modrm, int disp) {
int relo, mod, rm;
void (*emitter)(struct As *, unsigned long);
void (*emitter)(struct As *, uint128_t);
reg &= 7;
reg <<= 3;
if (modrm & ISREG) {
@ -2717,12 +2667,14 @@ static noinline void OpJmpImpl(struct As *a, int cc) {
if (IsPunct(a, a->i, '*')) ++a->i;
modrm = RemoveRexw(ParseModrm(a, &disp));
if (cc == -1) {
if ((modrm & ISRIP) || !(modrm & (HASBASE | HASINDEX))) {
modrm |= ISRIP;
a->pcrelative = R_X86_64_GOTPCRELX;
if (modrm & (ISREG | ISRIP | HASINDEX | HASBASE)) {
if (modrm & ISRIP) a->pcrelative = R_X86_64_GOTPCRELX;
EmitRexOpModrm(a, 0xFF, 4, modrm, disp, 0);
a->pcrelative = 0;
} else {
EmitByte(a, 0xE9);
EmitExpr(a, disp, R_X86_64_PC32, EmitLong);
}
EmitRexOpModrm(a, 0xFF, 4, modrm, disp, 0);
a->pcrelative = 0;
} else {
EmitByte(a, 0x0F);
EmitByte(a, 0x80 + cc);
@ -3141,6 +3093,7 @@ static const struct Directive8 {
{".loc", OnLoc}, //
{".local", OnLocal}, //
{".long", OnLong}, //
{".octa", OnOcta}, //
{".quad", OnQuad}, //
{".section", OnSection}, //
{".short", OnWord}, //
@ -3154,6 +3107,7 @@ static const struct Directive8 {
{".weak", OnWeak}, //
{".word", OnWord}, //
{".zero", OnZero}, //
{".zleb128", OnZleb128}, //
{"adc", OnAdc}, //
{"adcb", OnAdc}, //
{"adcl", OnAdc}, //
@ -4024,7 +3978,7 @@ static void PrintThings(struct As *a) {
a->sauces.p[a->things.p[i].s].line);
switch (a->things.p[i].t) {
case TT_INT:
printf("TT_INT %ld\n", a->ints.p[a->things.p[i].i]);
printf("TT_INT %jd\n", a->ints.p[a->things.p[i].i]);
break;
case TT_FLOAT:
g_xfmt_p(fbuf, &a->floats.p[a->things.p[i].i], 19, sizeof(fbuf), 0);
@ -4038,10 +3992,10 @@ static void PrintThings(struct As *a) {
printf("TT_PUNCT %s\n", PunctToStr(a->things.p[i].i, pbuf));
break;
case TT_BACKWARD:
printf("TT_BACKWARD %d\n", a->ints.p[a->things.p[i].i]);
printf("TT_BACKWARD %jd\n", a->ints.p[a->things.p[i].i]);
break;
case TT_FORWARD:
printf("TT_FORWARD %d\n", a->ints.p[a->things.p[i].i]);
printf("TT_FORWARD %jd\n", a->ints.p[a->things.p[i].i]);
break;
default:
abort();