Make fixes, improvements, and chibicc python bindings

- python now mixes audio 10x faster
- python octal notation is restored
- chibicc now builds code 3x faster
- chibicc now has help documentation
- chibicc can now generate basic python bindings
- linenoise now supports some paredit-like features

See #141
This commit is contained in:
Justine Tunney 2021-10-08 08:11:51 -07:00
parent 28997f3acb
commit 7061c79c22
121 changed files with 5272 additions and 1928 deletions

View file

@ -871,16 +871,16 @@ static size_t __asan_malloc_usable_size(const void *p) {
static void __asan_deallocate(char *p, long kind) {
size_t c, n;
if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) {
if (__asan_read48(p + c - 8, &n) && n <= c) {
__asan_poison((uintptr_t)p, c, kind);
if (c <= FRAMESIZE) {
p = __asan_morgue_add(p);
}
weaken(dlfree)(p);
} else {
__asan_report_invalid_pointer(p);
if (__asan_is_mapped((intptr_t)p >> 16) &&
(((intptr_t)p >> 16) == ((intptr_t)(p - 16) >> 16) ||
__asan_is_mapped((intptr_t)(p - 16) >> 16)) &&
(c = weaken(dlmalloc_usable_size)(p)) >= 8 &&
__asan_read48(p + c - 8, &n) && n <= c) {
__asan_poison((uintptr_t)p, c, kind);
if (c <= FRAMESIZE) {
p = __asan_morgue_add(p);
}
weaken(dlfree)(p);
} else {
__asan_report_invalid_pointer(p);
}
@ -891,6 +891,17 @@ void __asan_free(void *p) {
__asan_deallocate(p, kAsanHeapFree);
}
size_t __asan_bulk_free(void *p[], size_t n) {
size_t i;
for (i = 0; i < n; ++i) {
if (p[i]) {
__asan_deallocate(p[i], kAsanHeapFree);
p[i] = 0;
}
}
return 0;
}
void *__asan_memalign(size_t align, size_t size) {
return __asan_allocate(align, size, kAsanHeapUnderrun, kAsanHeapOverrun);
}
@ -1058,6 +1069,7 @@ void __asan_install_malloc_hooks(void) {
HOOK(hook_pvalloc, __asan_pvalloc);
HOOK(hook_realloc, __asan_realloc);
HOOK(hook_memalign, __asan_memalign);
HOOK(hook_bulk_free, __asan_bulk_free);
HOOK(hook_malloc_trim, __asan_malloc_trim);
HOOK(hook_realloc_in_place, __asan_realloc_in_place);
HOOK(hook_malloc_usable_size, __asan_malloc_usable_size);

View file

@ -18,6 +18,7 @@
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
@ -25,8 +26,9 @@
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
static noinline antiquity void bzero_sse(char *p, size_t n) {
noasan static noinline antiquity void bzero_sse(char *p, size_t n) {
xmm_t v = {0};
if (IsAsan()) __asan_check(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
@ -41,12 +43,13 @@ static noinline antiquity void bzero_sse(char *p, size_t n) {
}
}
microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
noasan microarchitecture("avx") static void bzero_avx(char *p, size_t n) {
xmm_t v = {0};
if (IsAsan()) __asan_check(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) {
} else if (n >= 1024 && X86_HAVE(ERMS)) {
asm("rep stosb" : "+D"(p), "+c"(n), "=m"(*(char(*)[n])p) : "a"(0));
} else {
if (n < kHalfCache3 || !kHalfCache3) {
@ -132,6 +135,7 @@ void(bzero)(void *p, size_t n) {
uint64_t x;
b = p;
if (IsTiny()) {
if (IsAsan()) __asan_check(p, n);
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(0));
return;
}

View file

@ -29,8 +29,8 @@ static noinline antiquity int memcmp_sse(const unsigned char *p,
unsigned u, u0, u1, u2, u3;
if (n > 32) {
while (n > 16 + 16) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p ==
*(const xmm_t *)q) -
0xffff)) {
n -= 16;
p += 16;
@ -41,11 +41,10 @@ static noinline antiquity int memcmp_sse(const unsigned char *p,
}
}
}
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == *(const xmm_t *)q) -
0xffff)) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)(p + n - 16) ==
*(const xmm_t *)(q + n - 16)) -
0xffff)) {
return 0;
} else {
@ -65,14 +64,14 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p,
unsigned u, u0, u1, u2, u3;
if (n > 32) {
while (n >= 16 + 64) {
u0 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[0], ((const xmm_t *)q)[0]));
u1 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[1], ((const xmm_t *)q)[1]));
u2 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[2], ((const xmm_t *)q)[2]));
u3 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
((const xmm_t *)p)[3], ((const xmm_t *)q)[3]));
u0 = __builtin_ia32_pmovmskb128(
(((const xmm_t *)p)[0] == ((const xmm_t *)q)[0]));
u1 = __builtin_ia32_pmovmskb128(
(((const xmm_t *)p)[1] == ((const xmm_t *)q)[1]));
u2 = __builtin_ia32_pmovmskb128(
(((const xmm_t *)p)[2] == ((const xmm_t *)q)[2]));
u3 = __builtin_ia32_pmovmskb128(
(((const xmm_t *)p)[3] == ((const xmm_t *)q)[3]));
w = (uint64_t)u0 | (uint64_t)u1 << 16 | (uint64_t)u2 << 32 |
(uint64_t)u3 << 48;
if (w == -1) {
@ -85,8 +84,8 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p,
}
}
while (n > 16 + 16) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p ==
*(const xmm_t *)q) -
0xffff)) {
n -= 16;
p += 16;
@ -97,11 +96,10 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p,
}
}
}
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)p, *(const xmm_t *)q)) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == *(const xmm_t *)q) -
0xffff)) {
if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(
*(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) -
if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)(p + n - 16) ==
*(const xmm_t *)(q + n - 16)) -
0xffff)) {
return 0;
} else {
@ -149,7 +147,8 @@ int memcmp(const void *a, const void *b, size_t n) {
uint32_t k, i, j;
uint64_t w, x, y;
const unsigned char *p, *q;
if ((p = a) == (q = b)) return 0;
if ((p = a) == (q = b) || !n) return 0;
if ((c = *p - *q)) return c;
if (!IsTiny()) {
if (n <= 16) {
if (n >= 8) {

View file

@ -89,8 +89,8 @@ asm("memcpy = memmove\n\t"
* @asyncsignalsafe
*/
void *memmove(void *dst, const void *src, size_t n) {
char *d;
size_t i;
char *d, *r;
const char *s;
uint64_t a, b;
xmm_t v, w, x, y, V, W, X, Y, wut;
@ -119,18 +119,22 @@ void *memmove(void *dst, const void *src, size_t n) {
} else if (n) {
*d = *s;
}
} else if (d <= s) {
asm("rep movsb"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
} else {
d += n - 1;
s += n - 1;
asm("std\n\t"
"rep movsb\n\t"
"cld"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
if (IsAsan()) __asan_check(d, n);
if (IsAsan()) __asan_check(s, n);
if (d <= s) {
asm("rep movsb"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
} else {
d += n - 1;
s += n - 1;
asm("std\n\t"
"rep movsb\n\t"
"cld"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst)
: "m"(*(char(*)[n])src));
}
}
return dst;
}
@ -208,7 +212,6 @@ void *memmove(void *dst, const void *src, size_t n) {
*(xmm_t *)(d + n - 16) = Y;
return d;
default:
r = d;
if (d == s) return d;
if (n < kHalfCache3 || !kHalfCache3) {
if (d > s) {
@ -221,12 +224,14 @@ void *memmove(void *dst, const void *src, size_t n) {
*(xmm_t *)(d + n + 16) = w;
} while (n >= 32);
} else {
if (IsAsan()) __asan_check(d, n);
if (IsAsan()) __asan_check(s, n);
asm("std\n\t"
"rep movsb\n\t"
"cld"
: "=D"(d), "=S"(s), "+c"(n), "=m"(*(char(*)[n])d)
: "0"(d + n - 1), "1"(s + n - 1), "m"(*(char(*)[n])s));
return r;
return dst;
}
} else {
if (IsAsan() || n < 900 || !X86_HAVE(ERMS)) {
@ -241,10 +246,12 @@ void *memmove(void *dst, const void *src, size_t n) {
s += i;
n -= i;
} else {
if (IsAsan()) __asan_check(d, n);
if (IsAsan()) __asan_check(s, n);
asm("rep movsb"
: "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])d)
: "m"(*(char(*)[n])s));
return r;
return dst;
}
}
} else {
@ -278,54 +285,31 @@ void *memmove(void *dst, const void *src, size_t n) {
}
asm("sfence");
}
switch (n) {
case 0:
return r;
case 17 ... 31:
__builtin_memcpy(&v, s, 16);
__builtin_memcpy(&w, s + n - 16, 16);
__builtin_memcpy(d, &v, 16);
__builtin_memcpy(d + n - 16, &w, 16);
return r;
case 9 ... 15:
if (n) {
if (n >= 16) {
v = *(const xmm_t *)s;
w = *(const xmm_t *)(s + n - 16);
*(xmm_t *)d = v;
*(xmm_t *)(d + n - 16) = w;
} else if (n >= 8) {
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(&b, s + n - 8, 8);
__builtin_memcpy(d, &a, 8);
__builtin_memcpy(d + n - 8, &b, 8);
return r;
case 5 ... 7:
} else if (n >= 4) {
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(&b, s + n - 4, 4);
__builtin_memcpy(d, &a, 4);
__builtin_memcpy(d + n - 4, &b, 4);
return r;
case 16:
__builtin_memcpy(&v, s, 16);
__builtin_memcpy(d, &v, 16);
return r;
case 8:
__builtin_memcpy(&a, s, 8);
__builtin_memcpy(d, &a, 8);
return r;
case 4:
__builtin_memcpy(&a, s, 4);
__builtin_memcpy(d, &a, 4);
return r;
case 1:
} else if (n >= 2) {
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(&b, s + n - 2, 2);
__builtin_memcpy(d, &a, 2);
__builtin_memcpy(d + n - 2, &b, 2);
} else {
*d = *s;
return r;
case 2:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(d, &a, 2);
return r;
case 3:
__builtin_memcpy(&a, s, 2);
__builtin_memcpy(&b, s + 1, 2);
__builtin_memcpy(d, &a, 2);
__builtin_memcpy(d + 1, &b, 2);
return r;
default:
unreachable;
}
}
return dst;
}
}

View file

@ -18,6 +18,7 @@
*/
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
@ -25,8 +26,9 @@
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16)));
static noinline antiquity void *memset_sse(char *p, char c, size_t n) {
noasan static noinline antiquity void *memset_sse(char *p, char c, size_t n) {
xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
if (IsAsan()) __asan_check(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
@ -42,13 +44,15 @@ static noinline antiquity void *memset_sse(char *p, char c, size_t n) {
return p;
}
microarchitecture("avx") static void *memset_avx(char *p, char c, size_t n) {
noasan microarchitecture("avx") static void *memset_avx(char *p, char c,
size_t n) {
char *t;
xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c};
if (IsAsan()) __asan_check(p, n);
if (n <= 32) {
*(xmm_t *)(p + n - 16) = v;
*(xmm_t *)p = v;
} else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) {
} else if (n >= 1024 && X86_HAVE(ERMS)) {
asm("rep stosb" : "=D"(t), "+c"(n), "=m"(*(char(*)[n])p) : "0"(p), "a"(c));
} else {
if (n < kHalfCache3 || !kHalfCache3) {
@ -137,6 +141,7 @@ void *memset(void *p, int c, size_t n) {
uint64_t x;
b = p;
if (IsTiny()) {
if (IsAsan()) __asan_check(p, n);
asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(c));
return p;
}

View file

@ -35,8 +35,8 @@ noasan size_t strlen(const char *s) {
unsigned m, k = (uintptr_t)s & 15;
const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
if (IsAsan()) __asan_verify(s, 1);
m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*p, z)) >> k << k;
while (!m) m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*++p, z));
m = __builtin_ia32_pmovmskb128(*p == z) >> k << k;
while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
n = (const char *)p + __builtin_ctzl(m) - s;
if (IsAsan()) __asan_verify(s, n);
return n;