From 7061c79c22cf31769e2480cfe86045b597ad6206 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Fri, 8 Oct 2021 08:11:51 -0700 Subject: [PATCH] Make fixes, improvements, and chibicc python bindings - python now mixes audio 10x faster - python octal notation is restored - chibicc now builds code 3x faster - chibicc now has help documentation - chibicc can now generate basic python bindings - linenoise now supports some paredit-like features See #141 --- Makefile | 2 +- ape/loader.c | 5 +- build/config.mk | 28 +- examples/ttyinfo.c | 2 + libc/alg/replacestr.c | 3 +- libc/bits/roundup2pow.c | 2 +- libc/fmt/formatint32.c | 55 ++ libc/fmt/itoa.h | 2 + libc/intrin/asan.c | 30 +- libc/intrin/bzero.c | 10 +- libc/intrin/memcmp.c | 41 +- libc/intrin/memmove.c | 92 +- libc/intrin/memset.c | 11 +- libc/intrin/strlen.c | 4 +- libc/log/backtrace2.c | 2 + libc/macros.internal.h | 1 + libc/mem/arena.c | 238 +++++ libc/mem/arena.h | 11 + libc/mem/bulk_free.S | 30 + libc/mem/hook/bulk_free.S | 31 + libc/mem/hook/hook.internal.h | 1 + libc/nt/efi.h | 7 +- libc/rand/rngset.c | 6 +- libc/runtime/atexit.c | 2 +- libc/runtime/cxaatexit.c | 2 +- libc/runtime/hook.greg.c | 16 +- libc/runtime/memtrack.internal.h | 2 +- libc/runtime/symbols.internal.h | 2 +- libc/str/djbsort.c | 45 +- libc/str/iswseparator.c | 10 +- {net/http => libc/str}/khextoint.c | 2 +- libc/{runtime => str}/longsort.c | 11 +- libc/str/memccpy.c | 34 +- libc/str/memchr.c | 69 +- libc/{runtime => str}/qsort.c | 0 libc/str/rawmemchr.c | 60 +- libc/str/str.h | 1 + libc/str/strchr.c | 72 +- libc/str/strchrnul.c | 70 +- libc/str/strcmp.c | 2 + libc/str/strpbrk.c | 15 +- libc/str/strstr.c | 6 +- libc/str/timingsafe_bcmp.c | 25 +- libc/str/towlower.c | 10 +- libc/str/towupper.c | 10 +- libc/testlib/testmain.c | 2 +- libc/zipos/get.c | 43 +- net/http/escape.h | 1 - test/libc/fmt/atoi_test.c | 1 + test/libc/intrin/memmove_test.c | 4 +- test/libc/intrin/memset_test.c | 21 +- test/libc/mem/arena_test.c | 65 ++ test/libc/mem/malloc_test.c | 23 + test/libc/mem/test.mk | 3 +- test/libc/nexgen32e/strsak32_test.c | 16 + test/libc/sock/poll_test.c | 1 + test/libc/str/blake2_test.c | 19 - test/libc/str/crc32c_test.c | 38 +- test/libc/str/longsort_test.c | 51 ++ test/libc/{runtime => str}/qsort_test.c | 30 - test/libc/str/strchr_test.c | 22 +- test/libc/str/strpbrk_test.c | 36 + third_party/chibicc/README.cosmo | 2 + third_party/chibicc/as.c | 242 ++--- third_party/chibicc/asm.c | 1 + third_party/chibicc/chibicc.c | 128 ++- third_party/chibicc/chibicc.h | 19 +- third_party/chibicc/chibicc.mk | 4 + third_party/chibicc/codegen.c | 45 +- third_party/chibicc/file.c | 143 ++- third_party/chibicc/hashmap.c | 24 +- third_party/chibicc/help.txt | 651 +++++++++++++ third_party/chibicc/kw.c | 33 + third_party/chibicc/kw.gperf | 120 +++ third_party/chibicc/kw.h | 116 +++ third_party/chibicc/kw.inc | 395 ++++++++ third_party/chibicc/parse.c | 854 ++++++++++-------- third_party/chibicc/preprocess.c | 276 +++--- third_party/chibicc/printast.c | 2 + third_party/chibicc/pybind.c | 547 +++++++++++ third_party/chibicc/strarray.c | 11 +- third_party/chibicc/tokenize.c | 470 +++++----- third_party/chibicc/unicode.c | 6 +- third_party/dlmalloc/README.cosmo | 30 +- third_party/dlmalloc/bulk_free.c | 12 +- third_party/dlmalloc/dlcalloc.c | 4 +- third_party/dlmalloc/dlindependent_calloc.c | 2 +- third_party/dlmalloc/dlmalloc.c | 8 +- third_party/dlmalloc/dlmalloc.internal.h | 4 +- third_party/dlmalloc/dlmalloc_stats.c | 2 +- third_party/linenoise/linenoise.c | 328 ++++++- third_party/python/Include/ceval.h | 52 +- third_party/python/Include/pyctype.h | 62 +- third_party/python/Lib/test/test_class.py | 1 + third_party/python/Lib/test/test_codecs.py | 4 +- third_party/python/Lib/test/test_compile.py | 8 +- third_party/python/Lib/test/test_fileio.py | 2 +- third_party/python/Lib/test/test_plistlib.py | 1 + third_party/python/Lib/test/test_scratch.py | 4 +- .../python/Modules/_decimal/_decimal.c | 1 - .../Modules/_decimal/libmpdec/umodarith.h | 1 - third_party/python/Modules/_hashmbedtls.c | 2 - third_party/python/Modules/_testcapimodule.c | 4 +- third_party/python/Modules/audioop.c | 29 +- third_party/python/Modules/tlsmodule.c | 1 - third_party/python/Objects/bytesobject.c | 1 + .../python/Objects/unicodeobject-deadcode.c | 431 +++++++++ third_party/python/Objects/unicodeobject.c | 468 +--------- third_party/python/Parser/tokenizer.c | 24 +- third_party/python/Python/cosmomodule.c | 3 +- third_party/python/Python/getargs.c | 2 +- third_party/python/Python/modsupport.c | 2 +- third_party/python/Python/mystrtoul.c | 14 +- third_party/python/Python/recursive.c | 32 + third_party/python/README.cosmo | 2 + third_party/python/chibicc.inc | 3 + third_party/python/pyobj.c | 1 - third_party/python/python.mk | 100 ++ third_party/stb/stb_image.c | 2 +- tool/build/runit.h | 6 +- tool/viz/printimage.c | 2 + 121 files changed, 5272 insertions(+), 1928 deletions(-) create mode 100644 libc/fmt/formatint32.c create mode 100644 libc/mem/arena.c create mode 100644 libc/mem/arena.h create mode 100644 libc/mem/bulk_free.S create mode 100644 libc/mem/hook/bulk_free.S rename {net/http => libc/str}/khextoint.c (99%) rename libc/{runtime => str}/longsort.c (94%) rename libc/{runtime => str}/qsort.c (100%) create mode 100644 test/libc/mem/arena_test.c create mode 100644 test/libc/str/longsort_test.c rename test/libc/{runtime => str}/qsort_test.c (84%) create mode 100644 test/libc/str/strpbrk_test.c create mode 100644 third_party/chibicc/help.txt create mode 100644 third_party/chibicc/kw.c create mode 100644 third_party/chibicc/kw.gperf create mode 100644 third_party/chibicc/kw.h create mode 100644 third_party/chibicc/kw.inc create mode 100644 third_party/chibicc/pybind.c create mode 100644 third_party/python/Objects/unicodeobject-deadcode.c create mode 100644 third_party/python/Python/recursive.c create mode 100644 third_party/python/chibicc.inc diff --git a/Makefile b/Makefile index 4978c9358..b6e93dea2 100644 --- a/Makefile +++ b/Makefile @@ -145,9 +145,9 @@ include third_party/quickjs/quickjs.mk include third_party/lz4cli/lz4cli.mk include third_party/infozip/infozip.mk include tool/build/lib/buildlib.mk -include third_party/python/python.mk include third_party/chibicc/chibicc.mk include third_party/chibicc/test/test.mk +include third_party/python/python.mk include tool/build/emucrt/emucrt.mk include tool/build/emubin/emubin.mk include tool/build/build.mk diff --git a/ape/loader.c b/ape/loader.c index 95462e042..783bec715 100644 --- a/ape/loader.c +++ b/ape/loader.c @@ -215,7 +215,7 @@ static void Spawn(int os, int fd, long *sp, char *b, struct Elf64_Ehdr *e) { return; } prot = 0; - flags = MAP_FIXED; + flags = MAP_FIXED | MAP_PRIVATE; if (p[i].p_flags & PF_R) { prot |= PROT_READ; } @@ -229,13 +229,12 @@ static void Spawn(int os, int fd, long *sp, char *b, struct Elf64_Ehdr *e) { } if (p[i].p_memsz > p[i].p_filesz) { if (Mmap(os, p[i].p_vaddr + p[i].p_filesz, p[i].p_memsz - p[i].p_filesz, - prot, flags | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) < 0) { + prot, flags | MAP_ANONYMOUS, -1, 0) < 0) { Log(os, "bss mmap failed\n"); return; } } if (p[i].p_filesz) { - flags |= prot & PROT_WRITE ? MAP_PRIVATE : MAP_SHARED; if (Mmap(os, p[i].p_vaddr, p[i].p_filesz, prot, flags, fd, p[i].p_offset) < 0) { Log(os, "image mmap failed\n"); diff --git a/build/config.mk b/build/config.mk index 06a51f68d..c037be511 100644 --- a/build/config.mk +++ b/build/config.mk @@ -148,20 +148,24 @@ endif # - YOLO ifeq ($(MODE), tiny) -CONFIG_CPPFLAGS += \ - -DTINY \ - -DNDEBUG \ +CONFIG_CPPFLAGS += \ + -DTINY \ + -DNDEBUG \ -DTRUSTWORTHY -CONFIG_CCFLAGS += \ - -Os \ - -fno-align-functions \ - -fno-align-jumps \ - -fno-align-labels \ - -fno-align-loops -TARGET_ARCH ?= \ +CONFIG_CCFLAGS += \ + -Os \ + -fno-align-functions \ + -fno-align-jumps \ + -fno-align-labels \ + -fno-align-loops \ + -fschedule-insns2 \ + -fomit-frame-pointer \ + -momit-leaf-frame-pointer \ + -foptimize-sibling-calls +TARGET_ARCH ?= \ -msse3 -PYFLAGS += \ - -O2 \ +PYFLAGS += \ + -O2 \ -B endif diff --git a/examples/ttyinfo.c b/examples/ttyinfo.c index 1b9740db5..f6a2e3341 100644 --- a/examples/ttyinfo.c +++ b/examples/ttyinfo.c @@ -27,6 +27,7 @@ #include "libc/x/x.h" #define CTRL(C) ((C) ^ 0b01000000) +#define ENABLE_SAFE_PASTE "\e[?2004h" #define ENABLE_MOUSE_TRACKING "\e[?1000;1002;1015;1006h" #define DISABLE_MOUSE_TRACKING "\e[?1000;1002;1015;1006l" #define PROBE_DISPLAY_SIZE "\e7\e[9979;9979H\e[6n\e8" @@ -71,6 +72,7 @@ int rawmode(void) { t.c_cflag |= CS8; t.c_iflag |= IUTF8; ioctl(1, TCSETS, &t); + write(1, ENABLE_SAFE_PASTE, strlen(ENABLE_SAFE_PASTE)); write(1, ENABLE_MOUSE_TRACKING, strlen(ENABLE_MOUSE_TRACKING)); write(1, PROBE_DISPLAY_SIZE, strlen(PROBE_DISPLAY_SIZE)); return 0; diff --git a/libc/alg/replacestr.c b/libc/alg/replacestr.c index ea4febe2f..82e0eb031 100644 --- a/libc/alg/replacestr.c +++ b/libc/alg/replacestr.c @@ -19,6 +19,7 @@ #include "libc/alg/alg.h" #include "libc/alg/arraylist2.internal.h" #include "libc/bits/safemacros.internal.h" +#include "libc/macros.internal.h" #include "libc/str/str.h" #include "libc/sysv/errfuns.h" @@ -38,7 +39,7 @@ char *replacestr(const char *s, const char *needle, const char *replacement) { nlen = strlen(needle); rlen = strlen(replacement); res_i = 0; - res_n = max(left, 32); + res_n = MAX(left, 32); if ((res_p = malloc(res_n * sizeof(char)))) { do { if (!(p2 = memmem(p1, left, needle, nlen))) break; diff --git a/libc/bits/roundup2pow.c b/libc/bits/roundup2pow.c index 3f73b0fa2..916c26476 100644 --- a/libc/bits/roundup2pow.c +++ b/libc/bits/roundup2pow.c @@ -26,5 +26,5 @@ * @see rounddown2pow() */ unsigned long roundup2pow(unsigned long x) { - return x > 1 ? 1ul << (bsrl(x - 1) + 1) : x ? 1 : 0; + return x > 1 ? 2ul << bsrl(x - 1) : x ? 1 : 0; } diff --git a/libc/fmt/formatint32.c b/libc/fmt/formatint32.c new file mode 100644 index 000000000..a4f5052ed --- /dev/null +++ b/libc/fmt/formatint32.c @@ -0,0 +1,55 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/fmt/itoa.h" + +/** + * Converts unsigned 32-bit integer to string. + * + * @param p needs at least 12 bytes + * @return pointer to nul byte + */ +noinline char *FormatUint32(char p[static 12], uint32_t x) { + char t; + size_t i, a, b; + i = 0; + do { + p[i++] = x % 10 + '0'; + x = x / 10; + } while (x > 0); + p[i] = '\0'; + if (i) { + for (a = 0, b = i - 1; a < b; ++a, --b) { + t = p[a]; + p[a] = p[b]; + p[b] = t; + } + } + return p + i; +} + +/** + * Converts signed 32-bit integer to string. + * + * @param p needs at least 12 bytes + * @return pointer to nul byte + */ +char *FormatInt32(char p[static 12], int32_t x) { + if (x < 0) *p++ = '-', x = -(uint32_t)x; + return FormatUint32(p, x); +} diff --git a/libc/fmt/itoa.h b/libc/fmt/itoa.h index 8c83921c3..3fda5a959 100644 --- a/libc/fmt/itoa.h +++ b/libc/fmt/itoa.h @@ -16,6 +16,8 @@ COSMOPOLITAN_C_START_ - uint128toarray_radix10(0x31337, a) l: 93 (27ns) m: 141 (41ns) - int128toarray_radix10(0x31337, a) l: 96 (28ns) m: 173 (51ns) */ +char *FormatInt32(char[hasatleast 12], int32_t); +char *FormatUint32(char[hasatleast 12], uint32_t); char *FormatInt64(char[hasatleast 21], int64_t); char *FormatUint64(char[hasatleast 21], uint64_t); char *FormatInt64Thousands(char[hasatleast 27], int64_t); diff --git a/libc/intrin/asan.c b/libc/intrin/asan.c index f3aee2725..a538d6b3c 100644 --- a/libc/intrin/asan.c +++ b/libc/intrin/asan.c @@ -871,16 +871,16 @@ static size_t __asan_malloc_usable_size(const void *p) { static void __asan_deallocate(char *p, long kind) { size_t c, n; - if ((c = weaken(dlmalloc_usable_size)(p)) >= 8) { - if (__asan_read48(p + c - 8, &n) && n <= c) { - __asan_poison((uintptr_t)p, c, kind); - if (c <= FRAMESIZE) { - p = __asan_morgue_add(p); - } - weaken(dlfree)(p); - } else { - __asan_report_invalid_pointer(p); + if (__asan_is_mapped((intptr_t)p >> 16) && + (((intptr_t)p >> 16) == ((intptr_t)(p - 16) >> 16) || + __asan_is_mapped((intptr_t)(p - 16) >> 16)) && + (c = weaken(dlmalloc_usable_size)(p)) >= 8 && + __asan_read48(p + c - 8, &n) && n <= c) { + __asan_poison((uintptr_t)p, c, kind); + if (c <= FRAMESIZE) { + p = __asan_morgue_add(p); } + weaken(dlfree)(p); } else { __asan_report_invalid_pointer(p); } @@ -891,6 +891,17 @@ void __asan_free(void *p) { __asan_deallocate(p, kAsanHeapFree); } +size_t __asan_bulk_free(void *p[], size_t n) { + size_t i; + for (i = 0; i < n; ++i) { + if (p[i]) { + __asan_deallocate(p[i], kAsanHeapFree); + p[i] = 0; + } + } + return 0; +} + void *__asan_memalign(size_t align, size_t size) { return __asan_allocate(align, size, kAsanHeapUnderrun, kAsanHeapOverrun); } @@ -1058,6 +1069,7 @@ void __asan_install_malloc_hooks(void) { HOOK(hook_pvalloc, __asan_pvalloc); HOOK(hook_realloc, __asan_realloc); HOOK(hook_memalign, __asan_memalign); + HOOK(hook_bulk_free, __asan_bulk_free); HOOK(hook_malloc_trim, __asan_malloc_trim); HOOK(hook_realloc_in_place, __asan_realloc_in_place); HOOK(hook_malloc_usable_size, __asan_malloc_usable_size); diff --git a/libc/intrin/bzero.c b/libc/intrin/bzero.c index 5720bd37a..1ff810d76 100644 --- a/libc/intrin/bzero.c +++ b/libc/intrin/bzero.c @@ -18,6 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" @@ -25,8 +26,9 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1))); typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16))); -static noinline antiquity void bzero_sse(char *p, size_t n) { +noasan static noinline antiquity void bzero_sse(char *p, size_t n) { xmm_t v = {0}; + if (IsAsan()) __asan_check(p, n); if (n <= 32) { *(xmm_t *)(p + n - 16) = v; *(xmm_t *)p = v; @@ -41,12 +43,13 @@ static noinline antiquity void bzero_sse(char *p, size_t n) { } } -microarchitecture("avx") static void bzero_avx(char *p, size_t n) { +noasan microarchitecture("avx") static void bzero_avx(char *p, size_t n) { xmm_t v = {0}; + if (IsAsan()) __asan_check(p, n); if (n <= 32) { *(xmm_t *)(p + n - 16) = v; *(xmm_t *)p = v; - } else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) { + } else if (n >= 1024 && X86_HAVE(ERMS)) { asm("rep stosb" : "+D"(p), "+c"(n), "=m"(*(char(*)[n])p) : "a"(0)); } else { if (n < kHalfCache3 || !kHalfCache3) { @@ -132,6 +135,7 @@ void(bzero)(void *p, size_t n) { uint64_t x; b = p; if (IsTiny()) { + if (IsAsan()) __asan_check(p, n); asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(0)); return; } diff --git a/libc/intrin/memcmp.c b/libc/intrin/memcmp.c index 827283e23..d96ba38a2 100644 --- a/libc/intrin/memcmp.c +++ b/libc/intrin/memcmp.c @@ -29,8 +29,8 @@ static noinline antiquity int memcmp_sse(const unsigned char *p, unsigned u, u0, u1, u2, u3; if (n > 32) { while (n > 16 + 16) { - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)p, *(const xmm_t *)q)) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == + *(const xmm_t *)q) - 0xffff)) { n -= 16; p += 16; @@ -41,11 +41,10 @@ static noinline antiquity int memcmp_sse(const unsigned char *p, } } } - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)p, *(const xmm_t *)q)) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == *(const xmm_t *)q) - 0xffff)) { - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)(p + n - 16) == + *(const xmm_t *)(q + n - 16)) - 0xffff)) { return 0; } else { @@ -65,14 +64,14 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p, unsigned u, u0, u1, u2, u3; if (n > 32) { while (n >= 16 + 64) { - u0 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - ((const xmm_t *)p)[0], ((const xmm_t *)q)[0])); - u1 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - ((const xmm_t *)p)[1], ((const xmm_t *)q)[1])); - u2 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - ((const xmm_t *)p)[2], ((const xmm_t *)q)[2])); - u3 = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - ((const xmm_t *)p)[3], ((const xmm_t *)q)[3])); + u0 = __builtin_ia32_pmovmskb128( + (((const xmm_t *)p)[0] == ((const xmm_t *)q)[0])); + u1 = __builtin_ia32_pmovmskb128( + (((const xmm_t *)p)[1] == ((const xmm_t *)q)[1])); + u2 = __builtin_ia32_pmovmskb128( + (((const xmm_t *)p)[2] == ((const xmm_t *)q)[2])); + u3 = __builtin_ia32_pmovmskb128( + (((const xmm_t *)p)[3] == ((const xmm_t *)q)[3])); w = (uint64_t)u0 | (uint64_t)u1 << 16 | (uint64_t)u2 << 32 | (uint64_t)u3 << 48; if (w == -1) { @@ -85,8 +84,8 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p, } } while (n > 16 + 16) { - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)p, *(const xmm_t *)q)) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == + *(const xmm_t *)q) - 0xffff)) { n -= 16; p += 16; @@ -97,11 +96,10 @@ microarchitecture("avx") static int memcmp_avx(const unsigned char *p, } } } - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)p, *(const xmm_t *)q)) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)p == *(const xmm_t *)q) - 0xffff)) { - if (!(u = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128( - *(const xmm_t *)(p + n - 16), *(const xmm_t *)(q + n - 16))) - + if (!(u = __builtin_ia32_pmovmskb128(*(const xmm_t *)(p + n - 16) == + *(const xmm_t *)(q + n - 16)) - 0xffff)) { return 0; } else { @@ -149,7 +147,8 @@ int memcmp(const void *a, const void *b, size_t n) { uint32_t k, i, j; uint64_t w, x, y; const unsigned char *p, *q; - if ((p = a) == (q = b)) return 0; + if ((p = a) == (q = b) || !n) return 0; + if ((c = *p - *q)) return c; if (!IsTiny()) { if (n <= 16) { if (n >= 8) { diff --git a/libc/intrin/memmove.c b/libc/intrin/memmove.c index a004ad1de..131248ed9 100644 --- a/libc/intrin/memmove.c +++ b/libc/intrin/memmove.c @@ -89,8 +89,8 @@ asm("memcpy = memmove\n\t" * @asyncsignalsafe */ void *memmove(void *dst, const void *src, size_t n) { + char *d; size_t i; - char *d, *r; const char *s; uint64_t a, b; xmm_t v, w, x, y, V, W, X, Y, wut; @@ -119,18 +119,22 @@ void *memmove(void *dst, const void *src, size_t n) { } else if (n) { *d = *s; } - } else if (d <= s) { - asm("rep movsb" - : "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst) - : "m"(*(char(*)[n])src)); } else { - d += n - 1; - s += n - 1; - asm("std\n\t" - "rep movsb\n\t" - "cld" - : "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst) - : "m"(*(char(*)[n])src)); + if (IsAsan()) __asan_check(d, n); + if (IsAsan()) __asan_check(s, n); + if (d <= s) { + asm("rep movsb" + : "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst) + : "m"(*(char(*)[n])src)); + } else { + d += n - 1; + s += n - 1; + asm("std\n\t" + "rep movsb\n\t" + "cld" + : "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])dst) + : "m"(*(char(*)[n])src)); + } } return dst; } @@ -208,7 +212,6 @@ void *memmove(void *dst, const void *src, size_t n) { *(xmm_t *)(d + n - 16) = Y; return d; default: - r = d; if (d == s) return d; if (n < kHalfCache3 || !kHalfCache3) { if (d > s) { @@ -221,12 +224,14 @@ void *memmove(void *dst, const void *src, size_t n) { *(xmm_t *)(d + n + 16) = w; } while (n >= 32); } else { + if (IsAsan()) __asan_check(d, n); + if (IsAsan()) __asan_check(s, n); asm("std\n\t" "rep movsb\n\t" "cld" : "=D"(d), "=S"(s), "+c"(n), "=m"(*(char(*)[n])d) : "0"(d + n - 1), "1"(s + n - 1), "m"(*(char(*)[n])s)); - return r; + return dst; } } else { if (IsAsan() || n < 900 || !X86_HAVE(ERMS)) { @@ -241,10 +246,12 @@ void *memmove(void *dst, const void *src, size_t n) { s += i; n -= i; } else { + if (IsAsan()) __asan_check(d, n); + if (IsAsan()) __asan_check(s, n); asm("rep movsb" : "+D"(d), "+S"(s), "+c"(n), "=m"(*(char(*)[n])d) : "m"(*(char(*)[n])s)); - return r; + return dst; } } } else { @@ -278,54 +285,31 @@ void *memmove(void *dst, const void *src, size_t n) { } asm("sfence"); } - switch (n) { - case 0: - return r; - case 17 ... 31: - __builtin_memcpy(&v, s, 16); - __builtin_memcpy(&w, s + n - 16, 16); - __builtin_memcpy(d, &v, 16); - __builtin_memcpy(d + n - 16, &w, 16); - return r; - case 9 ... 15: + if (n) { + if (n >= 16) { + v = *(const xmm_t *)s; + w = *(const xmm_t *)(s + n - 16); + *(xmm_t *)d = v; + *(xmm_t *)(d + n - 16) = w; + } else if (n >= 8) { __builtin_memcpy(&a, s, 8); __builtin_memcpy(&b, s + n - 8, 8); __builtin_memcpy(d, &a, 8); __builtin_memcpy(d + n - 8, &b, 8); - return r; - case 5 ... 7: + } else if (n >= 4) { __builtin_memcpy(&a, s, 4); __builtin_memcpy(&b, s + n - 4, 4); __builtin_memcpy(d, &a, 4); __builtin_memcpy(d + n - 4, &b, 4); - return r; - case 16: - __builtin_memcpy(&v, s, 16); - __builtin_memcpy(d, &v, 16); - return r; - case 8: - __builtin_memcpy(&a, s, 8); - __builtin_memcpy(d, &a, 8); - return r; - case 4: - __builtin_memcpy(&a, s, 4); - __builtin_memcpy(d, &a, 4); - return r; - case 1: + } else if (n >= 2) { + __builtin_memcpy(&a, s, 2); + __builtin_memcpy(&b, s + n - 2, 2); + __builtin_memcpy(d, &a, 2); + __builtin_memcpy(d + n - 2, &b, 2); + } else { *d = *s; - return r; - case 2: - __builtin_memcpy(&a, s, 2); - __builtin_memcpy(d, &a, 2); - return r; - case 3: - __builtin_memcpy(&a, s, 2); - __builtin_memcpy(&b, s + 1, 2); - __builtin_memcpy(d, &a, 2); - __builtin_memcpy(d + 1, &b, 2); - return r; - default: - unreachable; + } } + return dst; } } diff --git a/libc/intrin/memset.c b/libc/intrin/memset.c index a342ffda8..a7110fe67 100644 --- a/libc/intrin/memset.c +++ b/libc/intrin/memset.c @@ -18,6 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" @@ -25,8 +26,9 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1))); typedef long long xmm_a __attribute__((__vector_size__(16), __aligned__(16))); -static noinline antiquity void *memset_sse(char *p, char c, size_t n) { +noasan static noinline antiquity void *memset_sse(char *p, char c, size_t n) { xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + if (IsAsan()) __asan_check(p, n); if (n <= 32) { *(xmm_t *)(p + n - 16) = v; *(xmm_t *)p = v; @@ -42,13 +44,15 @@ static noinline antiquity void *memset_sse(char *p, char c, size_t n) { return p; } -microarchitecture("avx") static void *memset_avx(char *p, char c, size_t n) { +noasan microarchitecture("avx") static void *memset_avx(char *p, char c, + size_t n) { char *t; xmm_t v = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + if (IsAsan()) __asan_check(p, n); if (n <= 32) { *(xmm_t *)(p + n - 16) = v; *(xmm_t *)p = v; - } else if (!IsAsan() && n >= 1024 && X86_HAVE(ERMS)) { + } else if (n >= 1024 && X86_HAVE(ERMS)) { asm("rep stosb" : "=D"(t), "+c"(n), "=m"(*(char(*)[n])p) : "0"(p), "a"(c)); } else { if (n < kHalfCache3 || !kHalfCache3) { @@ -137,6 +141,7 @@ void *memset(void *p, int c, size_t n) { uint64_t x; b = p; if (IsTiny()) { + if (IsAsan()) __asan_check(p, n); asm("rep stosb" : "+D"(b), "+c"(n), "=m"(*(char(*)[n])b) : "0"(p), "a"(c)); return p; } diff --git a/libc/intrin/strlen.c b/libc/intrin/strlen.c index e32fd4cb9..1f09be783 100644 --- a/libc/intrin/strlen.c +++ b/libc/intrin/strlen.c @@ -35,8 +35,8 @@ noasan size_t strlen(const char *s) { unsigned m, k = (uintptr_t)s & 15; const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16); if (IsAsan()) __asan_verify(s, 1); - m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*p, z)) >> k << k; - while (!m) m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(*++p, z)); + m = __builtin_ia32_pmovmskb128(*p == z) >> k << k; + while (!m) m = __builtin_ia32_pmovmskb128(*++p == z); n = (const char *)p + __builtin_ctzl(m) - s; if (IsAsan()) __asan_verify(s, n); return n; diff --git a/libc/log/backtrace2.c b/libc/log/backtrace2.c index f522c5f2d..336337252 100644 --- a/libc/log/backtrace2.c +++ b/libc/log/backtrace2.c @@ -111,6 +111,7 @@ static noasan int PrintBacktraceUsingAddr2line(int fd, * * Then it's unpleasant to need to press C-x C-n six times. */ +#if 0 while ((p2 = memchr(p1, '\n', p3 - p1))) { if (memmem(p1, p2 - p1, ": __asan_", 9) || memmem(p1, p2 - p1, ": __die", 7)) { @@ -121,6 +122,7 @@ static noasan int PrintBacktraceUsingAddr2line(int fd, break; } } +#endif /* * remove racist output from gnu tooling, that can't be disabled diff --git a/libc/macros.internal.h b/libc/macros.internal.h index 5bbff6e92..14ef1624e 100644 --- a/libc/macros.internal.h +++ b/libc/macros.internal.h @@ -15,6 +15,7 @@ #define alignas(x) _Alignas(x) +#define IS2POW(X) (!((X) & ((X)-1))) #define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) #define ROUNDDOWN(X, K) ((X) & -(K)) #define ABS(X) ((X) >= 0 ? (X) : -(X)) diff --git a/libc/mem/arena.c b/libc/mem/arena.c new file mode 100644 index 000000000..0564ad82f --- /dev/null +++ b/libc/mem/arena.c @@ -0,0 +1,238 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/bits/likely.h" +#include "libc/bits/weaken.h" +#include "libc/calls/calls.h" +#include "libc/dce.h" +#include "libc/limits.h" +#include "libc/log/libfatal.internal.h" +#include "libc/log/log.h" +#include "libc/macros.internal.h" +#include "libc/mem/arena.h" +#include "libc/mem/hook/hook.internal.h" +#include "libc/runtime/runtime.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" + +#define BASE ((char *)0x30000000) +#define LIMIT ((char *)0x50000000) + +#define EXCHANGE(HOOK, SLOT) \ + __arena_hook((intptr_t *)weaken(HOOK), (intptr_t *)(&(SLOT))) + +static struct Arena { + bool once; + uint8_t depth; + unsigned size; + unsigned offset[16]; + void (*free)(void *); + void *(*malloc)(size_t); + void *(*calloc)(size_t, size_t); + void *(*memalign)(size_t, size_t); + void *(*realloc)(void *, size_t); + void *(*realloc_in_place)(void *, size_t); + void *(*valloc)(size_t); + void *(*pvalloc)(size_t); + int (*malloc_trim)(size_t); + size_t (*malloc_usable_size)(const void *); + size_t (*bulk_free)(void *[], size_t); +} __arena; + +static wontreturn void __arena_die(void) { + if (weaken(__die)) weaken(__die)(); + _exit(83); +} + +static wontreturn void __arena_not_implemented(void) { + __printf("not implemented"); + __arena_die(); +} + +static void __arena_free(void *p) { + if (!p) return; + assert(__arena.depth); + assert((intptr_t)BASE + __arena.offset[__arena.depth - 1] <= (intptr_t)p && + (intptr_t)p < (intptr_t)BASE + __arena.offset[__arena.depth]); +} + +static size_t __arena_bulk_free(void *p[], size_t n) { + size_t i; + for (i = 0; i < n; ++i) { + if (p[i]) __arena_free(p[i]); + } + bzero(p, n * sizeof(void *)); + return 0; +} + +static void *__arena_malloc(size_t n) { + char *ptr; + size_t need, greed; + assert(__arena.depth); + if (!n) n = 1; + if (n < LIMIT - BASE) { + need = __arena.offset[__arena.depth] + n; + need = ROUNDUP(need, __BIGGEST_ALIGNMENT__); + if (UNLIKELY(need > __arena.size)) { + greed = __arena.size + 1; + do { + greed += greed >> 1; + greed = ROUNDUP(greed, FRAMESIZE); + } while (need > greed); + if (greed < LIMIT - BASE && + mmap(BASE + __arena.size, greed - __arena.size, + PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, + -1, 0) != MAP_FAILED) { + __arena.size = greed; + } else { + return 0; + } + } + ptr = BASE + __arena.offset[__arena.depth]; + __arena.offset[__arena.depth] = need; + return ptr; + } else { + return 0; + } +} + +static void *__arena_calloc(size_t n, size_t z) { + if (__builtin_mul_overflow(n, z, &n)) n = -1; + return __arena_malloc(n); +} + +static void *__arena_memalign(size_t a, size_t n) { + if (a <= __BIGGEST_ALIGNMENT__) { + return __arena_malloc(n); + } else { + __arena_not_implemented(); + } +} + +static void *__arena_realloc(void *p, size_t n) { + if (p) { + if (n) { + __arena_not_implemented(); + } else { + __arena_free(p); + return 0; + } + } else { + return __arena_malloc(n); + } +} + +static int __arena_malloc_trim(size_t n) { + return 0; +} + +static void *__arena_realloc_in_place(void *p, size_t n) { + __arena_not_implemented(); +} + +static void *__arena_valloc(size_t n) { + __arena_not_implemented(); +} + +static void *__arena_pvalloc(size_t n) { + __arena_not_implemented(); +} + +static size_t __arena_malloc_usable_size(const void *p) { + __arena_not_implemented(); +} + +static void __arena_hook(intptr_t *h, intptr_t *f) { + intptr_t t; + if (h) { + t = *h; + *h = *f; + *f = t; + } +} + +static void __arena_install(void) { + EXCHANGE(hook_free, __arena.free); + EXCHANGE(hook_realloc, __arena.realloc); + EXCHANGE(hook_realloc, __arena.realloc); + EXCHANGE(hook_malloc, __arena.malloc); + EXCHANGE(hook_calloc, __arena.calloc); + EXCHANGE(hook_memalign, __arena.memalign); + EXCHANGE(hook_realloc_in_place, __arena.realloc_in_place); + EXCHANGE(hook_valloc, __arena.valloc); + EXCHANGE(hook_pvalloc, __arena.pvalloc); + EXCHANGE(hook_malloc_trim, __arena.malloc_trim); + EXCHANGE(hook_malloc_usable_size, __arena.malloc_usable_size); + EXCHANGE(hook_bulk_free, __arena.bulk_free); +} + +static void __arena_destroy(void) { + if (__arena.depth) { + __arena_install(); + } + if (__arena.size) { + munmap(BASE, __arena.size); + } + bzero(&__arena, sizeof(__arena)); +} + +static void __arena_init(void) { + __arena.free = __arena_free; + __arena.realloc = __arena_realloc; + __arena.realloc = __arena_realloc; + __arena.malloc = __arena_malloc; + __arena.calloc = __arena_calloc; + __arena.memalign = __arena_memalign; + __arena.realloc_in_place = __arena_realloc_in_place; + __arena.valloc = __arena_valloc; + __arena.pvalloc = __arena_pvalloc; + __arena.malloc_trim = __arena_malloc_trim; + __arena.malloc_usable_size = __arena_malloc_usable_size; + __arena.bulk_free = __arena_bulk_free; + atexit(__arena_destroy); +} + +void __arena_push(void) { + if (UNLIKELY(!__arena.once)) { + __arena_init(); + __arena.once = true; + } + if (!__arena.depth) { + __arena_install(); + } else if (__arena.depth == ARRAYLEN(__arena.offset) - 1) { + __printf("too many arenas"); + __arena_die(); + } + __arena.offset[__arena.depth + 1] = __arena.offset[__arena.depth]; + ++__arena.depth; +} + +void __arena_pop(void) { + unsigned greed; + assert(__arena.depth); + bzero(BASE + __arena.offset[__arena.depth - 1], + __arena.offset[__arena.depth] - __arena.offset[__arena.depth - 1]); + if (!--__arena.depth) __arena_install(); + greed = __arena.offset[__arena.depth]; + greed += FRAMESIZE; + greed <<= 1; + if (__arena.size > greed) { + munmap(BASE + greed, __arena.size - greed); + } +} diff --git a/libc/mem/arena.h b/libc/mem/arena.h new file mode 100644 index 000000000..404fecfe4 --- /dev/null +++ b/libc/mem/arena.h @@ -0,0 +1,11 @@ +#ifndef COSMOPOLITAN_LIBC_MEM_ARENA_H_ +#define COSMOPOLITAN_LIBC_MEM_ARENA_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +void __arena_push(void); +void __arena_pop(void); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_MEM_ARENA_H_ */ diff --git a/libc/mem/bulk_free.S b/libc/mem/bulk_free.S new file mode 100644 index 000000000..2516b1380 --- /dev/null +++ b/libc/mem/bulk_free.S @@ -0,0 +1,30 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + +// Frees and clears (sets to NULL) each non-null pointer in given array. +// +// This is twice as fast as freeing them one-by-one. If footers are +// used, pointers that have been allocated in different mspaces are +// not freed or cleared, and the count of all such pointers is returned. +// For large arrays of pointers with poor locality, it may be worthwhile +// to sort this array before calling bulk_free. +bulk_free: + jmp *hook_bulk_free(%rip) + .endfn bulk_free,globl diff --git a/libc/mem/hook/bulk_free.S b/libc/mem/hook/bulk_free.S new file mode 100644 index 000000000..b42e6149f --- /dev/null +++ b/libc/mem/hook/bulk_free.S @@ -0,0 +1,31 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + + .initbss 202,_init_bulk_free +hook_bulk_free: + .quad 0 + .endobj hook_bulk_free,globl,hidden + .previous + + .init.start 202,_init_bulk_free + .hidden dlbulk_free + ezlea dlbulk_free,ax + stosq + .init.end 202,_init_bulk_free diff --git a/libc/mem/hook/hook.internal.h b/libc/mem/hook/hook.internal.h index 54b5284db..b890435d4 100644 --- a/libc/mem/hook/hook.internal.h +++ b/libc/mem/hook/hook.internal.h @@ -13,6 +13,7 @@ extern void *(*hook_valloc)(size_t); extern void *(*hook_pvalloc)(size_t); extern int (*hook_malloc_trim)(size_t); extern size_t (*hook_malloc_usable_size)(const void *); +extern size_t (*hook_bulk_free)(void *[], size_t); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/nt/efi.h b/libc/nt/efi.h index 89b1c240e..6ac501950 100644 --- a/libc/nt/efi.h +++ b/libc/nt/efi.h @@ -101,7 +101,12 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -#define EFIAPI __attribute__((__ms_abi__)) +#if defined(__GNUC__) && __GNUC__ >= 6 && !defined(__chibicc__) +#define EFIAPI __attribute__((__ms_abi__)) +#else +#define EFIAPI /* TODO(jart): fix me */ +#endif + #define EFI_STATUS uint64_t #define EFI_EVENT uintptr_t #define EFI_HANDLE uintptr_t diff --git a/libc/rand/rngset.c b/libc/rand/rngset.c index 4b811a85c..9b4aba7a2 100644 --- a/libc/rand/rngset.c +++ b/libc/rand/rngset.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" +#include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/rand/rand.h" #include "libc/stdio/stdio.h" @@ -40,10 +41,13 @@ * * @return original buf */ -void *rngset(void *b, size_t n, uint64_t seed(void), size_t reseed) { +noasan void *rngset(void *b, size_t n, uint64_t seed(void), size_t reseed) { size_t m; uint64_t i, x, t = 0; unsigned char *p = b; + if (IsAsan()) { + __asan_check(b, n); + } if (!seed) { t = reseed; reseed = -1; diff --git a/libc/runtime/atexit.c b/libc/runtime/atexit.c index e93b10f95..11f0609c7 100644 --- a/libc/runtime/atexit.c +++ b/libc/runtime/atexit.c @@ -29,5 +29,5 @@ * @return 0 on success or nonzero if out of space */ int atexit(void f(void)) { - return __cxa_atexit(f, NULL, NULL); + return __cxa_atexit(f, 0, 0); } diff --git a/libc/runtime/cxaatexit.c b/libc/runtime/cxaatexit.c index 7e834bba3..688c6f311 100644 --- a/libc/runtime/cxaatexit.c +++ b/libc/runtime/cxaatexit.c @@ -51,7 +51,7 @@ static struct CxaAtexitBlocks { * @return 0 on success or nonzero w/ errno * @note folks have forked libc in past just to unbloat atexit() */ -int __cxa_atexit(void *fp, void *arg, void *pred) { +noasan int __cxa_atexit(void *fp, void *arg, void *pred) { unsigned i; struct CxaAtexitBlock *b, *b2; _Static_assert(ATEXIT_MAX == CHAR_BIT * sizeof(b->mask), ""); diff --git a/libc/runtime/hook.greg.c b/libc/runtime/hook.greg.c index 02f6ecee2..98e20e67e 100644 --- a/libc/runtime/hook.greg.c +++ b/libc/runtime/hook.greg.c @@ -21,9 +21,11 @@ #include "libc/calls/internal.h" #include "libc/calls/sigbits.h" #include "libc/calls/struct/sigset.h" +#include "libc/errno.h" #include "libc/log/libfatal.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" +#include "libc/str/str.h" #include "libc/sysv/consts/prot.h" /** @@ -44,8 +46,9 @@ * * @see ape/ape.lds */ -privileged noinstrument noasan void __hook(void *ifunc, - struct SymbolTable *symbols) { +privileged noinstrument noasan int __hook(void *ifunc, + struct SymbolTable *symbols) { + int rc; size_t i; char *p, *pe; intptr_t addr; @@ -57,10 +60,10 @@ privileged noinstrument noasan void __hook(void *ifunc, bool kIsBinaryAligned = !(kPrivilegedStart & (PAGESIZE - 1)); sigfillset(&mask); sigprocmask(SIG_BLOCK, &mask, &oldmask); - if (mprotect((void *)symbols->addr_base, - kPrivilegedStart - symbols->addr_base, - kIsBinaryAligned ? PROT_READ | PROT_WRITE - : PROT_READ | PROT_WRITE | PROT_EXEC) != -1) { + if ((rc = mprotect( + (void *)symbols->addr_base, kPrivilegedStart - symbols->addr_base, + kIsBinaryAligned ? PROT_READ | PROT_WRITE + : PROT_READ | PROT_WRITE | PROT_EXEC)) != -1) { for (i = 0; i < symbols->count; ++i) { if (symbols->addr_base + symbols->symbols[i].x < kProgramCodeStart) { continue; @@ -125,4 +128,5 @@ privileged noinstrument noasan void __hook(void *ifunc, PROT_READ | PROT_EXEC); } sigprocmask(SIG_SETMASK, &oldmask, NULL); + return rc; } diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h index f34e95bfe..0090b0ffa 100644 --- a/libc/runtime/memtrack.internal.h +++ b/libc/runtime/memtrack.internal.h @@ -19,7 +19,7 @@ COSMOPOLITAN_C_START_ (!(IsWindows() && NtGetVersion() < kNtVersionWindows10) ? NORMAL : WIN7) #define kAutomapStart MEMTRACK_ADDRESS(_kAutomapStart, 0x10000000) #define kAutomapSize MEMTRACK_ADDRESS(_kAutomapSize, 0x40000000) -#define kFixedmapStart MEMTRACK_ADDRESS(_kFixedmapStart, 0x40000000) +#define kFixedmapStart MEMTRACK_ADDRESS(_kFixedmapStart, 0x50000000) struct MemoryInterval { int x; diff --git a/libc/runtime/symbols.internal.h b/libc/runtime/symbols.internal.h index 8f420b99e..a4400d084 100644 --- a/libc/runtime/symbols.internal.h +++ b/libc/runtime/symbols.internal.h @@ -26,7 +26,7 @@ const char *FindComBinary(void); const char *FindDebugBinary(void); struct SymbolTable *OpenSymbolTable(const char *); int CloseSymbolTable(struct SymbolTable **); -void __hook(void *, struct SymbolTable *); +int __hook(void *, struct SymbolTable *); forceinline int GetSymbol(struct SymbolTable *t, intptr_t a) { unsigned l, m, r, n, k; diff --git a/libc/str/djbsort.c b/libc/str/djbsort.c index e048cbf35..66dfe7c68 100644 --- a/libc/str/djbsort.c +++ b/libc/str/djbsort.c @@ -17,18 +17,55 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/alg/alg.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/bsr.h" #include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/x86feature.h" void djbsort_avx2(int32_t *, long); +static noinline void intsort(int *x, size_t n, size_t t) { + int a, b, c; + size_t i, p, q; + for (p = t; p > 0; p >>= 1) { + for (i = 0; i < n - p; ++i) { + if (!(i & p)) { + a = x[i + 0]; + b = x[i + p]; + if (a > b) c = a, a = b, b = c; + x[i + 0] = a; + x[i + p] = b; + } + } + for (q = t; q > p; q >>= 1) { + for (i = 0; i < n - q; ++i) { + if (!(i & p)) { + a = x[i + p]; + b = x[i + q]; + if (a > b) c = a, a = b, b = c; + x[i + p] = a; + x[i + q] = b; + } + } + } + } +} + /** * D.J. Bernstein's outrageously fast integer sorting algorithm. */ void djbsort(int32_t *a, size_t n) { - if (X86_HAVE(AVX2)) { - djbsort_avx2(a, n); - } else { - insertionsort(a, n); + size_t m; + if (IsAsan()) { + if (__builtin_mul_overflow(n, 4, &m)) m = -1; + __asan_check(a, m); + } + if (n > 1) { + if (X86_HAVE(AVX2)) { + djbsort_avx2(a, n); + } else { + intsort(a, n, 1ul << bsrl(n - 1)); + } } } diff --git a/libc/str/iswseparator.c b/libc/str/iswseparator.c index 9f3bc0ff3..03cc4cfb9 100644 --- a/libc/str/iswseparator.c +++ b/libc/str/iswseparator.c @@ -390,14 +390,14 @@ static const unsigned kAstralCodes[][2] = { * other things like blocks and emoji (So). */ int iswseparator(wint_t c) { - int m, l, r; + int m, l, r, n; if (c < 0200) { return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')); } if (c <= 0xffff) { l = 0; - r = sizeof(kCodes) / sizeof(kCodes[0]); + r = n = sizeof(kCodes) / sizeof(kCodes[0]); while (l < r) { m = (l + r) >> 1; if (kCodes[m][1] < c) { @@ -406,10 +406,10 @@ int iswseparator(wint_t c) { r = m; } } - return !(kCodes[l][0] <= c && c <= kCodes[l][1]); + return !(l < n && kCodes[l][0] <= c && c <= kCodes[l][1]); } else { l = 0; - r = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]); + r = n = sizeof(kAstralCodes) / sizeof(kAstralCodes[0]); while (l < r) { m = (l + r) >> 1; if (kAstralCodes[m][1] < c) { @@ -418,6 +418,6 @@ int iswseparator(wint_t c) { r = m; } } - return !(kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]); + return !(l < n && kAstralCodes[l][0] <= c && c <= kAstralCodes[l][1]); } } diff --git a/net/http/khextoint.c b/libc/str/khextoint.c similarity index 99% rename from net/http/khextoint.c rename to libc/str/khextoint.c index 4c2339028..e0116f9af 100644 --- a/net/http/khextoint.c +++ b/libc/str/khextoint.c @@ -16,7 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "net/http/escape.h" +#include "libc/str/str.h" const signed char kHexToInt[256] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00 diff --git a/libc/runtime/longsort.c b/libc/str/longsort.c similarity index 94% rename from libc/runtime/longsort.c rename to libc/str/longsort.c index 8caf147c7..bb3651be0 100644 --- a/libc/runtime/longsort.c +++ b/libc/str/longsort.c @@ -18,12 +18,14 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/dce.h" #include "libc/intrin/asan.internal.h" +#include "libc/macros.internal.h" #include "libc/nexgen32e/bsr.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/runtime.h" forceinline void longsorter(long *x, size_t n, size_t t) { - long a, b, c, p, q, i; + long a, b, c; + size_t i, p, q; for (p = t; p > 0; p >>= 1) { for (i = 0; i < n - p; ++i) { if (!(i & p)) { @@ -68,7 +70,10 @@ void longsort(long *x, size_t n) { } if (n > 1) { t = 1ul << bsrl(n - 1); - if (X86_HAVE(AVX2)) return longsort_avx2(x, n, t); - return longsort_pure(x, n, t); + if (X86_HAVE(AVX2)) { + longsort_avx2(x, n, t); + } else { + longsort_pure(x, n, t); + } } } diff --git a/libc/str/memccpy.c b/libc/str/memccpy.c index f517a9bff..5feb7e5c7 100644 --- a/libc/str/memccpy.c +++ b/libc/str/memccpy.c @@ -54,38 +54,12 @@ static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) { */ void *memccpy(void *dst, const void *src, int c, size_t n) { size_t i; - uint64_t v, w; - unsigned char *d, *q; + unsigned char *d; const unsigned char *s; - i = 0; - d = dst; - s = src; - c &= 255; - v = 0x0101010101010101ul * c; - for (; (uintptr_t)(s + i) & 7; ++i) { - if (i == n) return NULL; - if ((d[i] = s[i]) == c) return d + i + 1; - } - for (; i + 8 <= n; i += 8) { - w = UncheckedAlignedRead64(s + i); - if (~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080) { - break; - } else { - q = d + i; - q[0] = (w & 0x00000000000000ff) >> 000; - q[1] = (w & 0x000000000000ff00) >> 010; - q[2] = (w & 0x0000000000ff0000) >> 020; - q[3] = (w & 0x00000000ff000000) >> 030; - q[4] = (w & 0x000000ff00000000) >> 040; - q[5] = (w & 0x0000ff0000000000) >> 050; - q[6] = (w & 0x00ff000000000000) >> 060; - q[7] = (w & 0xff00000000000000) >> 070; - } - } - for (; i < n; ++i) { - if ((d[i] = s[i]) == c) { + for (d = dst, s = src, i = 0; i < n; ++i) { + if ((d[i] = s[i]) == (c & 255)) { return d + i + 1; } } - return NULL; + return 0; } diff --git a/libc/str/memchr.c b/libc/str/memchr.c index c44f48380..ffcc8116d 100644 --- a/libc/str/memchr.c +++ b/libc/str/memchr.c @@ -16,35 +16,64 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" +typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1))); + +static inline const unsigned char *memchr_pure(const unsigned char *s, + unsigned char c, size_t n) { + size_t i; + for (i = 0; i < n; ++i) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} + +noasan static inline const unsigned char *memchr_sse(const unsigned char *s, + unsigned char c, + size_t n) { + size_t i; + unsigned k; + unsigned m; + xmm_t v, *p; + xmm_t t = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + for (; n >= 16; n -= 16, s += 16) { + v = *(const xmm_t *)s; + m = __builtin_ia32_pmovmskb128(v == t); + if (m) { + m = __builtin_ctzll(m); + return s + m; + } + } + for (i = 0; i < n; ++i) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} + /** * Returns pointer to first instance of character. * - * @param m is memory to search + * @param s is memory to search * @param c is search byte which is masked with 255 * @param n is byte length of p * @return is pointer to first instance of c or NULL if not found * @asyncsignalsafe */ -void *memchr(const void *m, int c, size_t n) { - uint64_t v, w; - const char *p, *pe; - c &= 255; - v = 0x0101010101010101ul * c; - for (p = m, pe = p + n; p + 8 <= pe; p += 8) { - w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 | - (uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 | - (uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 | - (uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000; - if ((w = ~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080)) { - return p + ((unsigned)__builtin_ctzll(w) >> 3); - } +void *memchr(const void *s, int c, size_t n) { + const void *r; + if (X86_HAVE(SSE)) { + if (IsAsan()) __asan_check(s, n); + r = memchr_sse(s, c, n); + } else { + r = memchr_pure(s, c, n); } - for (; p < pe; ++p) { - if ((*p & 255) == c) { - return p; - } - } - return NULL; + return (void *)r; } diff --git a/libc/runtime/qsort.c b/libc/str/qsort.c similarity index 100% rename from libc/runtime/qsort.c rename to libc/str/qsort.c diff --git a/libc/str/rawmemchr.c b/libc/str/rawmemchr.c index 8538fd611..a70225cbc 100644 --- a/libc/str/rawmemchr.c +++ b/libc/str/rawmemchr.c @@ -17,12 +17,39 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" -static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) { - return (uint64_t)p[7] << 070 | (uint64_t)p[6] << 060 | (uint64_t)p[5] << 050 | - (uint64_t)p[4] << 040 | (uint64_t)p[3] << 030 | (uint64_t)p[2] << 020 | - (uint64_t)p[1] << 010 | (uint64_t)p[0] << 000; +typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + +static inline const unsigned char *rawmemchr_pure(const unsigned char *s, + unsigned char c) { + for (;; ++s) { + if (*s == c) { + return s; + } + } +} + +noasan static inline const char *rawmemchr_sse(const char *s, unsigned char c) { + unsigned k; + unsigned m; + xmm_t v, *p; + xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + k = (uintptr_t)s & 15; + p = (const xmm_t *)((uintptr_t)s & -16); + v = *p; + m = __builtin_ia32_pmovmskb128(v == n); + m >>= k; + m <<= k; + while (!m) { + v = *++p; + m = __builtin_ia32_pmovmskb128(v == n); + } + m = __builtin_ctzll(m); + return (const char *)p + m; } /** @@ -32,22 +59,13 @@ static inline noasan uint64_t UncheckedAlignedRead64(unsigned char *p) { * @param c is search byte which is masked with 255 * @return is pointer to first instance of c */ -void *rawmemchr(const void *m, int c) { - uint64_t v, w; - const unsigned char *s; - s = m; - c &= 255; - v = 0x0101010101010101ul * c; - for (; (uintptr_t)s & 7; ++s) { - if (*s == c) return s; +void *rawmemchr(const void *s, int c) { + const void *r; + if (X86_HAVE(SSE)) { + if (IsAsan()) __asan_check(s, 1); + r = rawmemchr_sse(s, c); + } else { + r = rawmemchr_pure(s, c); } - for (;; s += 8) { - w = UncheckedAlignedRead64(s); - if ((w = ~(w ^ v) & ((w ^ v) - 0x0101010101010101) & 0x8080808080808080)) { - s += (unsigned)__builtin_ctzll(w) >> 3; - break; - } - } - assert(*s == c); - return s; + return (void *)r; } diff --git a/libc/str/str.h b/libc/str/str.h index 3cd9cdbba..1f63ad427 100644 --- a/libc/str/str.h +++ b/libc/str/str.h @@ -7,6 +7,7 @@ COSMOPOLITAN_C_START_ ╚────────────────────────────────────────────────────────────────────────────│─╝ fourth age telecommunications */ +extern const int8_t kHexToInt[256]; extern const uint8_t gperf_downcase[256]; extern const uint8_t kToLower[256]; extern const uint8_t kToUpper[256]; diff --git a/libc/str/strchr.c b/libc/str/strchr.c index 3e9e6fe93..653514c0a 100644 --- a/libc/str/strchr.c +++ b/libc/str/strchr.c @@ -17,37 +17,42 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" -static noasan inline const char *strchr_x64(const char *p, uint64_t c) { - unsigned a, b; - uint64_t w, x, y; - for (c *= 0x0101010101010101;; p += 8) { - w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 | - (uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 | - (uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 | - (uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000; - if ((x = ~(w ^ c) & ((w ^ c) - 0x0101010101010101) & 0x8080808080808080) | - (y = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) { - if (x) { - a = __builtin_ctzll(x); - if (y) { - b = __builtin_ctzll(y); - if (a <= b) { - return p + (a >> 3); - } else { - return 0; - } - } else { - return p + (a >> 3); - } - } else { - return 0; - } - } +typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + +static inline const char *strchr_pure(const char *s, int c) { + for (;; ++s) { + if ((*s & 255) == (c & 255)) return s; + if (!*s) return 0; } } +noasan static inline const char *strchr_sse(const char *s, unsigned char c) { + unsigned k; + unsigned m; + xmm_t v, *p; + xmm_t z = {0}; + xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + k = (uintptr_t)s & 15; + p = (const xmm_t *)((uintptr_t)s & -16); + v = *p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + m >>= k; + m <<= k; + while (!m) { + v = *++p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + } + m = __builtin_ctzl(m); + s = (const char *)p + m; + if (c && !*s) s = 0; + return s; +} + /** * Returns pointer to first instance of character. * @@ -58,12 +63,13 @@ static noasan inline const char *strchr_x64(const char *p, uint64_t c) { * @asyncsignalsafe */ char *strchr(const char *s, int c) { - char *r; - for (c &= 255; (uintptr_t)s & 7; ++s) { - if ((*s & 255) == c) return s; - if (!*s) return NULL; + const char *r; + if (X86_HAVE(SSE)) { + if (IsAsan()) __asan_check(s, 1); + r = strchr_sse(s, c); + } else { + r = strchr_pure(s, c); } - r = strchr_x64(s, c); - assert(!r || *r || !c); - return r; + assert(!r || *r || !(c & 255)); + return (char *)r; } diff --git a/libc/str/strchrnul.c b/libc/str/strchrnul.c index cde51302f..afaa0556d 100644 --- a/libc/str/strchrnul.c +++ b/libc/str/strchrnul.c @@ -17,38 +17,39 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" -noasan static const char *strchrnul_x64(const char *p, uint64_t c) { - unsigned a, b; - uint64_t w, x, y; - for (c *= 0x0101010101010101;; p += 8) { - w = (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 | - (uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 | - (uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 | - (uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000; - if ((x = ~(w ^ c) & ((w ^ c) - 0x0101010101010101) & 0x8080808080808080) | - (y = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) { - if (x) { - a = __builtin_ctzll(x); - if (y) { - b = __builtin_ctzll(y); - if (a <= b) { - return p + (a >> 3); - } else { - return p + (b >> 3); - } - } else { - return p + (a >> 3); - } - } else { - b = __builtin_ctzll(y); - return p + (b >> 3); - } - } +typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + +static inline const char *strchrnul_pure(const char *s, int c) { + for (;; ++s) { + if ((*s & 255) == (c & 255)) return s; + if (!*s) return s; } } +noasan static inline const char *strchrnul_sse(const char *s, unsigned char c) { + unsigned k; + unsigned m; + xmm_t v, *p; + xmm_t z = {0}; + xmm_t n = {c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c}; + k = (uintptr_t)s & 15; + p = (const xmm_t *)((uintptr_t)s & -16); + v = *p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + m >>= k; + m <<= k; + while (!m) { + v = *++p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); + } + return (const char *)p + __builtin_ctzl(m); +} + /** * Returns pointer to first instance of character. * @@ -58,12 +59,13 @@ noasan static const char *strchrnul_x64(const char *p, uint64_t c) { * NUL terminator if c is not found */ char *strchrnul(const char *s, int c) { - char *r; - for (c &= 255; (uintptr_t)s & 7; ++s) { - if ((*s & 0xff) == c) return s; - if (!*s) return s; + const char *r; + if (X86_HAVE(SSE)) { + if (IsAsan()) __asan_check(s, 1); + r = strchrnul_sse(s, c); + } else { + r = strchrnul_pure(s, c); } - r = strchrnul_x64(s, c); - assert((*r & 255) == c || !*r); - return r; + assert((*r & 255) == (c & 255) || !*r); + return (char *)r; } diff --git a/libc/str/strcmp.c b/libc/str/strcmp.c index e30824c6e..96aa0f319 100644 --- a/libc/str/strcmp.c +++ b/libc/str/strcmp.c @@ -34,9 +34,11 @@ static inline noasan uint64_t UncheckedAlignedRead64(const char *p) { * @asyncsignalsafe */ int strcmp(const char *a, const char *b) { + int c; size_t i = 0; uint64_t v, w, d; if (a == b) return 0; + if ((c = (*a & 255) - (*b & 255))) return c; if (((uintptr_t)a & 7) == ((uintptr_t)b & 7)) { for (; (uintptr_t)(a + i) & 7; ++i) { if (a[i] != b[i] || !b[i]) { diff --git a/libc/str/strpbrk.c b/libc/str/strpbrk.c index a14ac7cc0..c5de42ea1 100644 --- a/libc/str/strpbrk.c +++ b/libc/str/strpbrk.c @@ -16,7 +16,6 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/hascharacter.internal.h" #include "libc/str/str.h" /** @@ -24,17 +23,21 @@ * @asyncsignalsafe */ char *strpbrk(const char *s, const char *accept) { - size_t i; + bool lut[256]; if (accept[0]) { if (!accept[1]) { return strchr(s, accept[0]); } else { - for (i = 0; s[i]; ++i) { - if (HasCharacter(s[i], accept)) { - return (/*unconst*/ char *)&s[i]; + memset(lut, 0, sizeof(lut)); + while (*accept) { + lut[*accept++ & 255] = true; + } + for (; *s; ++s) { + if (lut[*s & 255]) { + return (/*unconst*/ char *)s; } } } } - return NULL; + return 0; } diff --git a/libc/str/strstr.c b/libc/str/strstr.c index 4054e834e..93970318c 100644 --- a/libc/str/strstr.c +++ b/libc/str/strstr.c @@ -49,14 +49,12 @@ noasan char *strstr(const char *haystack, const char *needle) { k = (uintptr_t)haystack & 15; p = (const xmm_t *)((uintptr_t)haystack & -16); v = *p; - m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(v, z) | - __builtin_ia32_pcmpeqb128(v, n)); + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); m >>= k; m <<= k; while (!m) { v = *++p; - m = __builtin_ia32_pmovmskb128(__builtin_ia32_pcmpeqb128(v, z) | - __builtin_ia32_pcmpeqb128(v, n)); + m = __builtin_ia32_pmovmskb128((v == z) | (v == n)); } haystack = (const char *)p + __builtin_ctzl(m); for (i = 0;; ++i) { diff --git a/libc/str/timingsafe_bcmp.c b/libc/str/timingsafe_bcmp.c index 1b89cd43f..0891f3d9d 100644 --- a/libc/str/timingsafe_bcmp.c +++ b/libc/str/timingsafe_bcmp.c @@ -18,14 +18,15 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/likely.h" #include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/nexgen32e/x86feature.h" #include "libc/str/str.h" typedef uint64_t xmm_t __attribute__((__vector_size__(16), __aligned__(1))); -static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p, - const char *q, - size_t n) { +noasan static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p, + const char *q, + size_t n) { uint64_t w; xmm_t a = {0}; while (n > 16 + 16) { @@ -40,9 +41,9 @@ static noinline antiquity unsigned timingsafe_bcmp_sse(const char *p, return w | w >> 32; } -microarchitecture("avx") static int timingsafe_bcmp_avx(const char *p, - const char *q, - size_t n) { +noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p, + const char *q, + size_t n) { uint64_t w; xmm_t a = {0}; if (n > 32) { @@ -134,10 +135,16 @@ int timingsafe_bcmp(const void *a, const void *b, size_t n) { __builtin_memcpy(&w3, q + n - 8, 8); w = (w0 ^ w1) | (w2 ^ w3); return w | w >> 32; - } else if (X86_HAVE(AVX)) { - return timingsafe_bcmp_avx(p, q, n); } else { - return timingsafe_bcmp_sse(p, q, n); + if (IsAsan()) { + __asan_check(a, n); + __asan_check(b, n); + } + if (X86_HAVE(AVX)) { + return timingsafe_bcmp_avx(p, q, n); + } else { + return timingsafe_bcmp_sse(p, q, n); + } } } else if (n >= 4) { __builtin_memcpy(&u0, p, 4); diff --git a/libc/str/towlower.c b/libc/str/towlower.c index 4b4c0ac94..fcea20604 100644 --- a/libc/str/towlower.c +++ b/libc/str/towlower.c @@ -177,7 +177,7 @@ static const int kAstralLower[][3] = { * Converts wide character to lower case. */ wint_t towlower(wint_t c) { - int m, l, r; + int m, l, r, n; if (c < 0200) { if ('A' <= c && c <= 'Z') { return c + 32; @@ -199,7 +199,7 @@ wint_t towlower(wint_t c) { return c + 38864; /* 80x Ꭰ ..Ꮿ → ꭰ ..ꮿ Cherokee */ } else { l = 0; - r = sizeof(kLower) / sizeof(kLower[0]); + r = n = sizeof(kLower) / sizeof(kLower[0]); while (l < r) { m = (l + r) >> 1; if (kLower[m].y < c) { @@ -208,7 +208,7 @@ wint_t towlower(wint_t c) { r = m; } } - if (kLower[l].x <= c && c <= kLower[l].y) { + if (l < n && kLower[l].x <= c && c <= kLower[l].y) { return c + kLower[l].d; } else { return c; @@ -216,7 +216,7 @@ wint_t towlower(wint_t c) { } } else { l = 0; - r = sizeof(kAstralLower) / sizeof(kAstralLower[0]); + r = n = sizeof(kAstralLower) / sizeof(kAstralLower[0]); while (l < r) { m = (l + r) >> 1; if (kAstralLower[m][1] < c) { @@ -225,7 +225,7 @@ wint_t towlower(wint_t c) { r = m; } } - if (kAstralLower[l][0] <= c && c <= kAstralLower[l][1]) { + if (l < n && kAstralLower[l][0] <= c && c <= kAstralLower[l][1]) { return c + kAstralLower[l][2]; } else { return c; diff --git a/libc/str/towupper.c b/libc/str/towupper.c index 4bff81d50..26126f505 100644 --- a/libc/str/towupper.c +++ b/libc/str/towupper.c @@ -140,7 +140,7 @@ static const int kAstralUpper[][3] = { * Converts wide character to upper case. */ wint_t towupper(wint_t c) { - int m, l, r; + int m, l, r, n; if (c < 0200) { if ('a' <= c && c <= 'z') { return c - 32; @@ -162,7 +162,7 @@ wint_t towupper(wint_t c) { return c - 38864; /* 80x ꭰ ..ꮿ → Ꭰ ..Ꮿ Cherokee Supplement */ } else { l = 0; - r = sizeof(kUpper) / sizeof(kUpper[0]); + r = n = sizeof(kUpper) / sizeof(kUpper[0]); while (l < r) { m = (l + r) >> 1; if (kUpper[m].y < c) { @@ -171,7 +171,7 @@ wint_t towupper(wint_t c) { r = m; } } - if (kUpper[l].x <= c && c <= kUpper[l].y) { + if (l < n && kUpper[l].x <= c && c <= kUpper[l].y) { return c + kUpper[l].d; } else { return c; @@ -179,7 +179,7 @@ wint_t towupper(wint_t c) { } } else { l = 0; - r = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]); + r = n = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]); while (l < r) { m = (l + r) >> 1; if (kAstralUpper[m][1] < c) { @@ -188,7 +188,7 @@ wint_t towupper(wint_t c) { r = m; } } - if (kAstralUpper[l][0] <= c && c <= kAstralUpper[l][1]) { + if (l < n && kAstralUpper[l][0] <= c && c <= kAstralUpper[l][1]) { return c + kAstralUpper[l][2]; } else { return c; diff --git a/libc/testlib/testmain.c b/libc/testlib/testmain.c index 78896089d..ef9e35e34 100644 --- a/libc/testlib/testmain.c +++ b/libc/testlib/testmain.c @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { testlib_runalltests(); if (!g_testlib_failed && runbenchmarks_ && weaken(testlib_runallbenchmarks)) { weaken(testlib_runallbenchmarks)(); - if (!g_testlib_failed) { + if (!g_testlib_failed && IsRunningUnderMake()) { return 254; /* compile.com considers this 0 and propagates output */ } } diff --git a/libc/zipos/get.c b/libc/zipos/get.c index c8a15e917..e9b99be84 100644 --- a/libc/zipos/get.c +++ b/libc/zipos/get.c @@ -22,6 +22,7 @@ #include "libc/calls/struct/stat.h" #include "libc/errno.h" #include "libc/limits.h" +#include "libc/macros.internal.h" #include "libc/mem/alloca.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" @@ -33,6 +34,27 @@ #include "libc/zip.h" #include "libc/zipos/zipos.internal.h" +static uint64_t __zipos_get_min_offset(const uint8_t *base, + const uint8_t *cdir) { + uint64_t i, n, c, r, o; + c = GetZipCdirOffset(cdir); + n = GetZipCdirRecords(cdir); + for (r = c, i = 0; i < n; ++i, c += ZIP_CFILE_HDRSIZE(base + c)) { + o = GetZipCfileOffset(base + c); + if (o < r) r = o; + } + return r; +} + +static void __zipos_munmap_unneeded(const uint8_t *base, const uint8_t *cdir, + const uint8_t *map) { + uint64_t n; + n = __zipos_get_min_offset(base, cdir); + n += base - map; + n = ROUNDDOWN(n, FRAMESIZE); + if (n) munmap(map, n); +} + /** * Returns pointer to zip central directory of current executable. */ @@ -40,30 +62,31 @@ struct Zipos *__zipos_get(void) { static bool once; static struct Zipos zipos; int fd; - size_t n; char *path; + size_t size; sigset_t neu, old; - uint8_t *p, *base, *cdir; + uint8_t *map, *base, *cdir; if (!once) { sigfillset(&neu); sigprocmask(SIG_BLOCK, &neu, &old); if ((fd = open(program_executable_name, O_RDONLY)) != -1) { - if ((n = getfiledescriptorsize(fd)) != SIZE_MAX && - (p = mmap(0, n, PROT_READ, MAP_SHARED, fd, 0)) != MAP_FAILED) { + if ((size = getfiledescriptorsize(fd)) != SIZE_MAX && + (map = mmap(0, size, PROT_READ, MAP_SHARED, fd, 0)) != MAP_FAILED) { if (endswith(program_executable_name, ".com.dbg")) { - if ((base = memmem(p, n, "MZqFpD", 6))) { - n -= base - p; + if ((base = memmem(map, size, "MZqFpD", 6))) { + size -= base - map; } else { - base = p; + base = map; } } else { - base = p; + base = map; } - if ((cdir = GetZipCdir(base, n))) { + if ((cdir = GetZipCdir(base, size))) { + __zipos_munmap_unneeded(base, cdir, map); zipos.map = base; zipos.cdir = cdir; } else { - munmap(p, n); + munmap(map, size); ZTRACE("__zipos_get(%s) → eocd not found", program_executable_name); } } else { diff --git a/net/http/escape.h b/net/http/escape.h index 665ebb0de..79b14cc04 100644 --- a/net/http/escape.h +++ b/net/http/escape.h @@ -8,7 +8,6 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -extern const signed char kHexToInt[256]; extern const char kEscapeAuthority[256]; extern const char kEscapeIp[256]; extern const char kEscapePath[256]; diff --git a/test/libc/fmt/atoi_test.c b/test/libc/fmt/atoi_test.c index 208e1b1de..b7d808cd0 100644 --- a/test/libc/fmt/atoi_test.c +++ b/test/libc/fmt/atoi_test.c @@ -337,6 +337,7 @@ TEST(strtoumax, testZero) { } TEST(strtoumax, testDecimal) { EXPECT_EQ(123, strtoumax("123", NULL, 0)); + EXPECT_EQ(-123, strtoumax("-123", NULL, 0)); } TEST(strtoumax, testHex) { EXPECT_EQ(255, strtoumax("0xff", NULL, 0)); diff --git a/test/libc/intrin/memmove_test.c b/test/libc/intrin/memmove_test.c index 38e940eb4..90a5d61b9 100644 --- a/test/libc/intrin/memmove_test.c +++ b/test/libc/intrin/memmove_test.c @@ -68,8 +68,8 @@ TEST(memmove, bighug) { int N[] = {5 * 1024 * 1024}; a = gc(malloc(6291456)); b = gc(malloc(6291456)); - for (o1 = 0; o1 < 40; o1 += 10) { - for (o2 = 0; o2 < 40; o2 += 10) { + for (o1 = 0; o1 < 40; o1 += 20) { + for (o2 = 0; o2 < 40; o2 += 20) { for (i = 0; i < ARRAYLEN(N); ++i) { rngset(a, 6291456, 0, 0); memcpy(b, a, 6291456); diff --git a/test/libc/intrin/memset_test.c b/test/libc/intrin/memset_test.c index 53a0b8e8b..7edaa9fa9 100644 --- a/test/libc/intrin/memset_test.c +++ b/test/libc/intrin/memset_test.c @@ -16,6 +16,8 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/mem/mem.h" #include "libc/rand/rand.h" #include "libc/runtime/gc.internal.h" @@ -23,8 +25,9 @@ #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" -static void *golden(void *p, int c, size_t n) { +static noasan void *golden(void *p, int c, size_t n) { size_t i; + if (IsAsan()) __asan_check(p, n); for (i = 0; i < n; ++i) ((char *)p)[i] = c; return p; } @@ -32,18 +35,16 @@ static void *golden(void *p, int c, size_t n) { TEST(memset, hug) { char *a, *b; int i, j, c; + a = malloc(1025 * 2); + b = malloc(1025 * 2); for (i = 0; i < 1025; ++i) { for (j = 0; j < 1025 - i; ++j) { - a = malloc(i + j); - b = malloc(i + j); c = vigna(); - rngset(a, i + j, vigna, 0); + rngset(a, i + j, 0, 0); memcpy(b, a, i + j); ASSERT_EQ(a + i, golden(a + i, c, j)); ASSERT_EQ(b + i, memset(b + i, c, j)); ASSERT_EQ(0, timingsafe_bcmp(a, b, i + j)); - free(b); - free(a); } } } @@ -51,17 +52,15 @@ TEST(memset, hug) { TEST(bzero, hug) { char *a, *b; int i, j; + a = malloc(1025 * 2); + b = malloc(1025 * 2); for (i = 0; i < 1025; ++i) { for (j = 0; j < 1025 - i; ++j) { - a = malloc(i + j); - b = malloc(i + j); - rngset(a, i + j, vigna, 0); + rngset(a, i + j, 0, 0); memcpy(b, a, i + j); golden(a + i, 0, j); bzero(b + i, j); ASSERT_EQ(0, timingsafe_bcmp(a, b, i + j)); - free(b); - free(a); } } } diff --git a/test/libc/mem/arena_test.c b/test/libc/mem/arena_test.c new file mode 100644 index 000000000..c1f3debf5 --- /dev/null +++ b/test/libc/mem/arena_test.c @@ -0,0 +1,65 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/mem/arena.h" +#include "libc/mem/mem.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/testlib.h" + +TEST(arena, test) { + EXPECT_STREQ("hello", gc(strdup("hello"))); + __arena_push(); + EXPECT_STREQ("hello", strdup("hello")); + __arena_push(); + EXPECT_STREQ("hello", strdup("hello")); + for (int i = 0; i < 5000; ++i) { + EXPECT_STREQ("hello", strdup("hello")); + } + free(strdup("hello")); + __arena_pop(); + EXPECT_STREQ("", calloc(1, 16)); + EXPECT_STREQ("hello", strdup("hello")); + __arena_pop(); +} + +void *calloc_(size_t, size_t) asm("calloc"); + +void A(size_t n) { + __arena_push(); + for (int i = 0; i < n; ++i) { + calloc_(15, 1); + } + __arena_pop(); +} + +void B(size_t n) { + void **P; + P = malloc(n * sizeof(void *)); + for (int i = 0; i < n; ++i) { + P[i] = calloc_(15, 1); + } + bulk_free(P, n); + free(P); +} + +BENCH(arena, bench) { + EZBENCH2("A 100", donothing, A(100)); + EZBENCH2("B 100", donothing, B(100)); + EZBENCH2("A 5000", donothing, A(5000)); + EZBENCH2("B 5000", donothing, B(5000)); +} diff --git a/test/libc/mem/malloc_test.c b/test/libc/mem/malloc_test.c index 1588262e7..cf87b23f6 100644 --- a/test/libc/mem/malloc_test.c +++ b/test/libc/mem/malloc_test.c @@ -30,6 +30,7 @@ #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" +#include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" #define N 1024 @@ -82,3 +83,25 @@ TEST(malloc, test) { for (i = 0; i < ARRAYLEN(fds); ++i) close(fds[i]); malloc_trim(0); } + +void *bulk[1024]; + +void BulkFreeBenchSetup(void) { + size_t i; + for (i = 0; i < ARRAYLEN(bulk); ++i) { + bulk[i] = malloc(rand() % 64); + } +} + +void FreeBulk(void) { + size_t i; + for (i = 0; i < ARRAYLEN(bulk); ++i) { + free(bulk[i]); + } +} + +BENCH(bulk_free, bench) { + EZBENCH2("free() bulk", BulkFreeBenchSetup(), FreeBulk()); + EZBENCH2("bulk_free()", BulkFreeBenchSetup(), + bulk_free(bulk, ARRAYLEN(bulk))); +} diff --git a/test/libc/mem/test.mk b/test/libc/mem/test.mk index dd1120293..5402d35df 100644 --- a/test/libc/mem/test.mk +++ b/test/libc/mem/test.mk @@ -33,7 +33,8 @@ TEST_LIBC_MEM_DIRECTDEPS = \ LIBC_STR \ LIBC_STUBS \ LIBC_SYSV \ - LIBC_TESTLIB + LIBC_TESTLIB \ + THIRD_PARTY_DLMALLOC TEST_LIBC_MEM_DEPS := \ $(call uniq,$(foreach x,$(TEST_LIBC_MEM_DIRECTDEPS),$($(x)))) diff --git a/test/libc/nexgen32e/strsak32_test.c b/test/libc/nexgen32e/strsak32_test.c index a5de98424..c7ccb6055 100644 --- a/test/libc/nexgen32e/strsak32_test.c +++ b/test/libc/nexgen32e/strsak32_test.c @@ -17,11 +17,27 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/nexgen32e.h" +#include "libc/runtime/gc.internal.h" #include "libc/str/str.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" +#include "libc/x/x.h" TEST(strsak32, test) { EXPECT_EQ(0, wcslen(L"")); EXPECT_EQ(1, wcslen(L"1")); EXPECT_EQ(5, wcslen(L"hello")); } + +BENCH(strsak32, bench) { + size_t wcslen_(const wchar_t *) asm("wcslen"); + wchar_t *p = gc(utf8toutf32(kHyperion, kHyperionSize, 0)); + EZBENCH_N("wcslen", kHyperionSize, wcslen_(p)); + for (int i = 128; i >= 2; i /= 2) { + p[i - 0] = 0; + EZBENCH_N("wcslen", i - 0, wcslen_(p)); + p[i - 1] = 0; + EZBENCH_N("wcslen", i - 1, wcslen_(p)); + } +} diff --git a/test/libc/sock/poll_test.c b/test/libc/sock/poll_test.c index 5ae0c06fe..da9b543f5 100644 --- a/test/libc/sock/poll_test.c +++ b/test/libc/sock/poll_test.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/log/libfatal.internal.h" #include "libc/runtime/gc.internal.h" #include "libc/sock/sock.h" #include "libc/sysv/consts/af.h" diff --git a/test/libc/str/blake2_test.c b/test/libc/str/blake2_test.c index 9cc34bcbd..4c02081a2 100644 --- a/test/libc/str/blake2_test.c +++ b/test/libc/str/blake2_test.c @@ -24,25 +24,6 @@ #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" -const signed char kHexToInt[256] = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x00 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x10 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x20 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 0x30 - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x40 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x50 - -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x60 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x70 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x80 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x90 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xa0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xb0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xc0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xd0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xe0 - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0xf0 -}; - uint8_t *EZBLAKE2B256(const char *s, size_t n) { static uint8_t digest[BLAKE2B256_DIGEST_LENGTH]; BLAKE2B256(s, n, digest); diff --git a/test/libc/str/crc32c_test.c b/test/libc/str/crc32c_test.c index f468bf2e0..939748048 100644 --- a/test/libc/str/crc32c_test.c +++ b/test/libc/str/crc32c_test.c @@ -22,6 +22,7 @@ #include "libc/nexgen32e/crc32.h" #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/gc.internal.h" +#include "libc/stdio/stdio.h" #include "libc/str/str.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/hyperion.h" @@ -49,7 +50,38 @@ TEST(crc32c, test) { EXPECT_EQ(0xecc9871d, crc32c(0, kHyperion, kHyperionSize)); } -BENCH(crc32c, bench) { - EZBENCH2("crc32c", donothing, crc32c(0, kHyperion, kHyperionSize)); - EZBENCH2("crc32_z", donothing, crc32_z(0, kHyperion, kHyperionSize)); +noinline uint64_t fnv_hash(char *s, int len) { + uint64_t hash = 0xcbf29ce484222325; + for (int i = 0; i < len; i++) { + hash *= 0x100000001b3; + hash ^= (unsigned char)s[i]; + } + return hash; +} + +static unsigned KMH(const void *p, unsigned long n) { + unsigned h, i; + for (h = i = 0; i < n; i++) { + h += ((unsigned char *)p)[i]; + h *= 0x9e3779b1; + } + return MAX(1, h); +} + +BENCH(crc32c, bench) { + for (int i = 1; i < 256; i *= 2) { + EZBENCH_N("crc32c", i, crc32c(0, kHyperion, i)); + EZBENCH_N("crc32_z", i, crc32_z(0, kHyperion, i)); + EZBENCH_N("fnv_hash", i, + EXPROPRIATE(fnv_hash(VEIL("r", kHyperion), VEIL("r", i)))); + EZBENCH_N("KMH", i, EXPROPRIATE(KMH(VEIL("r", kHyperion), VEIL("r", i)))); + fprintf(stderr, "\n"); + } + EZBENCH_N("crc32c", kHyperionSize, crc32c(0, kHyperion, kHyperionSize)); + EZBENCH_N("crc32_z", kHyperionSize, crc32_z(0, kHyperion, kHyperionSize)); + EZBENCH_N( + "fnv_hash", kHyperionSize, + EXPROPRIATE(fnv_hash(VEIL("r", kHyperion), VEIL("r", kHyperionSize)))); + EZBENCH_N("KMH", kHyperionSize, + EXPROPRIATE(KMH(VEIL("r", kHyperion), VEIL("r", kHyperionSize)))); } diff --git a/test/libc/str/longsort_test.c b/test/libc/str/longsort_test.c new file mode 100644 index 000000000..a3b213528 --- /dev/null +++ b/test/libc/str/longsort_test.c @@ -0,0 +1,51 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/alg/alg.h" +#include "libc/rand/rand.h" +#include "libc/runtime/runtime.h" +#include "libc/str/str.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/testlib.h" + +int CompareLong(const void *a, const void *b) { + const long *x = a; + const long *y = b; + if (*x < *y) return -1; + if (*x > *y) return +1; + return 0; +} + +TEST(longsort, test) { + size_t n = 5000; + long *a = gc(calloc(n, sizeof(long))); + long *b = gc(calloc(n, sizeof(long))); + rngset(a, n * sizeof(long), 0, 0); + memcpy(b, a, n * sizeof(long)); + qsort(a, n, sizeof(long), CompareLong); + longsort(b, n); + ASSERT_EQ(0, memcmp(b, a, n * sizeof(long))); +} + +BENCH(longsort, bench) { + size_t n = 1000; + long *p1 = gc(malloc(n * sizeof(long))); + long *p2 = gc(malloc(n * sizeof(long))); + rngset(p1, n * sizeof(long), 0, 0); + EZBENCH2("longsort", memcpy(p2, p1, n * sizeof(long)), longsort(p2, n)); +} diff --git a/test/libc/runtime/qsort_test.c b/test/libc/str/qsort_test.c similarity index 84% rename from test/libc/runtime/qsort_test.c rename to test/libc/str/qsort_test.c index 2b83375cd..869249e80 100644 --- a/test/libc/runtime/qsort_test.c +++ b/test/libc/str/qsort_test.c @@ -35,25 +35,6 @@ int CompareLong(const void *a, const void *b) { return 0; } -unsigned long doge(unsigned long x) { - unsigned long t = 1; - while (t < x - t) { - t += t; - } - return t; -} - -unsigned long B(unsigned long x) { - return 1ul << bsrl(x - 1); -} - -TEST(eh, eu) { - int i; - for (i = 2; i < 9999; ++i) { - ASSERT_EQ(doge(i), B(i), "%d", i); - } -} - TEST(qsort, test) { const int32_t A[][2] = {{4, 'a'}, {65, 'b'}, {2, 'c'}, {-31, 'd'}, {0, 'e'}, {99, 'f'}, {2, 'g'}, {83, 'h'}, @@ -68,17 +49,6 @@ TEST(qsort, test) { free(M); } -TEST(longsort, test) { - size_t n = 5000; - long *a = gc(calloc(n, sizeof(long))); - long *b = gc(calloc(n, sizeof(long))); - rngset(a, n * sizeof(long), 0, 0); - memcpy(b, a, n * sizeof(long)); - qsort(a, n, sizeof(long), CompareLong); - longsort(b, n); - ASSERT_EQ(0, memcmp(b, a, n * sizeof(long))); -} - BENCH(qsort, bench) { size_t n = 1000; long *p1 = gc(malloc(n * sizeof(long))); diff --git a/test/libc/str/strchr_test.c b/test/libc/str/strchr_test.c index 214a82a4d..7c0e5f1f7 100644 --- a/test/libc/str/strchr_test.c +++ b/test/libc/str/strchr_test.c @@ -21,6 +21,7 @@ #include "libc/rand/rand.h" #include "libc/str/str.h" #include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" TEST(strchr, blank) { @@ -84,8 +85,8 @@ TEST(strchr, fuzz) { p = calloc(1, 64); for (i = -2; i < 257; ++i) { for (j = 0; j < 17; ++j) { - rngset(p, 63, rand64, -1); - ASSERT_EQ(strchr(p + j, i), strchr_pure(p + j, i)); + rngset(p, 63, rdseed, -1); + ASSERT_EQ(strchr_pure(p + j, i), strchr(p + j, i)); } } free(p); @@ -165,3 +166,20 @@ TEST(rawmemchr, fuzz) { } free(p); } + +BENCH(strchr, bench2) { + char *strchr_(const char *, int) asm("strchr"); + char *strchrnul_(const char *, int) asm("strchrnul"); + char *memchr_(const char *, int, size_t) asm("memchr"); + char *strlen_(const char *) asm("strlen"); + char *rawmemchr_(const char *, int) asm("rawmemchr"); + EZBENCH2("strchr z", donothing, strchr_(kHyperion, 'z')); + EZBENCH2("rawmemchr z", donothing, rawmemchr_(kHyperion, 'z')); + EZBENCH2("memchr z", donothing, memchr_(kHyperion, 'z', kHyperionSize)); + EZBENCH2("strchr Z", donothing, strchr_(kHyperion, 'Z')); + EZBENCH2("rawmemchr \\0", donothing, rawmemchr_(kHyperion, 0)); + EZBENCH2("strlen", donothing, strlen_(kHyperion)); + EZBENCH2("memchr Z", donothing, memchr_(kHyperion, 'Z', kHyperionSize)); + EZBENCH2("strchrnul z", donothing, strchrnul_(kHyperion, 'z')); + EZBENCH2("strchrnul Z", donothing, strchrnul_(kHyperion, 'Z')); +} diff --git a/test/libc/str/strpbrk_test.c b/test/libc/str/strpbrk_test.c new file mode 100644 index 000000000..e1521a3fd --- /dev/null +++ b/test/libc/str/strpbrk_test.c @@ -0,0 +1,36 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/str.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" +#include "libc/testlib/testlib.h" + +TEST(strpbrk, test) { + EXPECT_STREQ("o", strpbrk("hello", "abco")); + EXPECT_EQ(NULL, strpbrk("hello", "ABCO")); +} + +BENCH(strpbrk, bench) { + char *strchr_(const char *, int) asm("strchr"); + char *strpbrk_(const char *, const char *) asm("strpbrk"); + EZBENCH2("strchr", donothing, strchr_(kHyperion, 'z')); + EZBENCH2("strpbrk 1", donothing, strpbrk_(kHyperion, "z")); + EZBENCH2("strpbrk 2", donothing, strpbrk_(kHyperion, "Zz")); + EZBENCH2("strpbrk 10", donothing, strpbrk_(kHyperion, ">@#\6\3\2\5\6Zz")); +} diff --git a/third_party/chibicc/README.cosmo b/third_party/chibicc/README.cosmo index bb5ce109c..feca45a91 100644 --- a/third_party/chibicc/README.cosmo +++ b/third_party/chibicc/README.cosmo @@ -6,6 +6,7 @@ which is great, considering it's a 220kb αcτµαlly pδrταblε εxεcµταb local enhancements +- add assembler - support dce - support gnu asm - support __int128 @@ -24,6 +25,7 @@ local enhancements - reduce #lines of generated assembly by a third - reduce #bytes of generated binary by a third - report divide errors in constexprs +- use perfect hash table for keywords local bug fixes diff --git a/third_party/chibicc/as.c b/third_party/chibicc/as.c index 0fb3e5cdb..a19a00a89 100644 --- a/third_party/chibicc/as.c +++ b/third_party/chibicc/as.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/bits.h" +#include "libc/bits/popcnt.h" #include "libc/calls/calls.h" #include "libc/calls/struct/stat.h" #include "libc/elf/def.h" @@ -26,6 +27,7 @@ #include "libc/macros.internal.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/bsr.h" +#include "libc/nexgen32e/crc32.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/str/str.h" @@ -35,93 +37,6 @@ #include "third_party/gdtoa/gdtoa.h" #include "tool/build/lib/elfwriter.h" -/** - * @fileoverview Assembler - * - * This program turns assembly into relocatable NexGen32e ELF objects. - * That process is normally an implementation detail of your compiler, - * which can embed this program or launch it as a subprocess. Much GNU - * style syntax is supported. Your code that gets embedded in an asm() - * statement will ultimately end up here. This implementation, has the - * advantage of behaving the same across platforms, in a simple single - * file implementation that compiles down to a 100kilo ape executable. - * - * Your assembler supports the following flags: - * - * -o FILE output path [default: a.out] - * -I DIR append include path [default: .] - * -W inhibit .warning - * -Z inhibit .error and .err - * - * Your assembler supports the following directives: - * - * .zero INT... emits int8 - * .word INT... emits int16 - * .long INT... emits int32 - * .quad INT... emits int64 - * .ascii STR... emits string - * .asciz STR... emits string and 0 byte - * .ident STR emits string to .comment section - * .float NUMBER... emits binary32 - * .double NUMBER... emits binary64 - * .float80 NUMBER... emits x86 float (10 bytes) - * .ldbl NUMBER... emits x86 float (16 bytes) - * .sleb128 NUMBER... emits LEB-128 signed varint - * .uleb128 NUMBER... emits LEB-128 unsigned varint - * .align BYTES [FILL [MAXSKIP]] emits fill bytes to boundary - * .end halts tokenization - * .abort crashes assembler - * .err aborts (ignorable w/ -Z) - * .error STR aborts (ignorable w/ -Z) - * .warning STR whines (ignorable w/ -W) - * .text enters text section (default) - * .data enters data section - * .bss enters bss section - * .section NAME [SFLG SHT] enters section - * .previous enters previous section - * .pushsection NAME [SFLG SHT] pushes section - * .popsection pops section - * .type SYM TYPE sets type of symbol - * .size SYM EXPR sets size of symbol - * .internal SYM... marks symbol STV_INTERNAL - * .hidden SYM... marks symbol STV_HIDDEN - * .protected SYM... marks symbol STV_PROTECTED - * .globl SYM... marks symbol STB_GLOBAL - * .local SYM... marks symbol STB_LOCAL - * .weak SYM... marks symbol STB_WEAK - * .include FILE assembles file source - * .incbin FILE emits file content - * .file FILENO PATH dwarf file define - * .loc FILENO LINENO dwarf source line - * - * TYPE can be one of the following: - * - * - @notype STT_NOTYPE (default) - * - @object STT_OBJECT - * - @function STT_FUNC - * - @common STT_COMMON - * - @tls_object STT_TLS - * - * SHT can be one of the following: - * - * - @progbits SHT_PROGBITS - * - @note SHT_NOTE - * - @nobits SHT_NOBITS - * - @preinit_array SHT_PREINIT_ARRAY - * - @init_array SHT_INIT_ARRAY - * - @fini_array SHT_FINI_ARRAY - * - * SFLG is a string which may have the following characters: - * - * - a SHF_ALLOC - * - w SHF_WRITE - * - x SHF_EXECINSTR - * - g SHF_GROUP - * - M SHF_MERGE - * - S SHF_STRINGS - * - T SHF_TLS - */ - #define OSZ 0x66 #define ASZ 0x67 #define REX 0x40 // byte @@ -144,7 +59,7 @@ #define IS(P, N, S) (N == sizeof(S) - 1 && !strncasecmp(P, S, sizeof(S) - 1)) #define MAX(X, Y) ((Y) < (X) ? (X) : (Y)) -#define READ128BE(S) ((unsigned __int128)READ64BE(S) << 64 | READ64BE((S) + 8)) +#define READ128BE(S) ((uint128_t)READ64BE(S) << 64 | READ64BE((S) + 8)) struct As { int i; // things @@ -158,7 +73,7 @@ struct As { bool inhibitwarn; struct Ints { unsigned long n, c; - long *p; + int128_t *p; } ints; struct Floats { unsigned long n, c; @@ -276,7 +191,7 @@ struct As { unsigned tok; int lhs; int rhs; - long x; + int128_t x; bool isvisited; bool isevaluated; } * p; @@ -431,14 +346,8 @@ static const struct Reg { {"xmm9", 1 | 4<<3 | REXR<<8, 1 | 4<<3 | REXB<<8, -1, -1 }, } /* clang-format on */; -static unsigned Hash(const void *p, unsigned long n) { - unsigned h, i; - for (h = i = 0; i < n; i++) { - h += ((unsigned char *)p)[i]; - h *= 0x9e3779b1; - } - return MAX(1, h); -} +long as_hashmap_hits; +long as_hashmap_miss; static bool IsPunctMergeable(int c) { switch (c) { @@ -586,6 +495,7 @@ static void ReadFlags(struct As *a, int argc, char *argv[]) { } static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) { + int x; if (c != '\\') return c; switch ((c = p[(*i)++])) { case 'a': @@ -605,10 +515,10 @@ static int ReadCharLiteral(struct Slice *buf, int c, char *p, int *i) { case 'e': return 033; case 'x': - if (isxdigit(p[*i])) { - c = hextoint(p[(*i)++]); - if (isxdigit(p[*i])) { - c = c * 16 + hextoint(p[(*i)++]); + if ((x = kHexToInt[p[*i] & 255]) != -1) { + *i += 1, c = x; + if ((x = kHexToInt[p[*i] & 255]) != -1) { + *i += 1, c = c << 4 | x; } } return c; @@ -670,6 +580,10 @@ static void Tokenize(struct As *a, int path) { char *p, *path2; struct Slice buf; bool bol, isfloat, isfpu; + if (!fileexists(a->strings.p[path])) { + fprintf(stderr, "%s: file not found\n", a->strings.p[path]); + exit(1); + } p = SaveString(&a->strings, read_file(a->strings.p[path])); p = skip_bom(p); canonicalize_newline(p); @@ -779,7 +693,7 @@ static void Tokenize(struct As *a, int path) { a->things.p[a->things.n - 1].t = TT_FLOAT; } else { APPEND(a->ints); - a->ints.p[a->ints.n - 1] = strtoul(p, NULL, 0); + a->ints.p[a->ints.n - 1] = strtoumax(p, NULL, 0); a->things.p[a->things.n - 1].i = a->ints.n - 1; if (p[i] == 'f' || p[i] == 'F') { a->things.p[a->things.n - 1].t = TT_FORWARD; @@ -859,22 +773,28 @@ static void Tokenize(struct As *a, int path) { static int GetSymbol(struct As *a, int name) { struct HashEntry *p; unsigned i, j, k, n, m, h, n2; - h = Hash(a->slices.p[name].p, a->slices.p[name].n); + if (!(h = crc32c(0, a->slices.p[name].p, a->slices.p[name].n))) h = 1; n = a->symbolindex.n; i = 0; if (n) { k = 0; - do { + for (;;) { i = (h + k + ((k + 1) >> 1)) & (n - 1); if (a->symbolindex.p[i].h == h && a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].n == a->slices.p[name].n && !memcmp(a->slices.p[a->symbols.p[a->symbolindex.p[i].i].name].p, a->slices.p[name].p, a->slices.p[name].n)) { + ++as_hashmap_hits; return a->symbolindex.p[i].i; } - ++k; - } while (a->symbolindex.p[i].h); + if (!a->symbolindex.p[i].h) { + break; + } else { + ++k; + ++as_hashmap_miss; + } + } } if (++a->symbolindex.i >= (n >> 1)) { m = n ? n << 1 : 16; @@ -983,7 +903,7 @@ static void ConsumeComma(struct As *a) { ConsumePunct(a, ','); } -static int NewPrimary(struct As *a, enum ExprKind k, long x) { +static int NewPrimary(struct As *a, enum ExprKind k, int128_t x) { AppendExpr(a); a->exprs.p[a->exprs.n - 1].kind = k; a->exprs.p[a->exprs.n - 1].x = x; @@ -1299,7 +1219,7 @@ static int Parse(struct As *a) { return ParseOr(a, &a->i, a->i); } -static long GetInt(struct As *a) { +static int128_t GetInt(struct As *a) { int x; x = Parse(a); if (a->exprs.p[x].kind == EX_INT) { @@ -1331,7 +1251,7 @@ static struct Slice GetSlice(struct As *a) { } } -static void EmitData(struct As *a, const void *p, unsigned long n) { +static void EmitData(struct As *a, const void *p, uint128_t n) { struct Slice *s; s = &a->sections.p[a->section].binary; s->p = realloc(s->p, s->n + n); @@ -1339,41 +1259,50 @@ static void EmitData(struct As *a, const void *p, unsigned long n) { s->n += n; } -static void EmitByte(struct As *a, unsigned long x) { +static void EmitByte(struct As *a, uint128_t i) { + uint8_t x = i; unsigned char b[1]; - b[0] = x >> 000; + b[0] = (x & 0xff) >> 000; EmitData(a, b, 1); } -static void EmitWord(struct As *a, unsigned long x) { +static void EmitWord(struct As *a, uint128_t i) { + uint16_t x = i; unsigned char b[2]; - b[0] = x >> 000; - b[1] = x >> 010; + b[0] = (x & 0x00ff) >> 000; + b[1] = (x & 0xff00) >> 010; EmitData(a, b, 2); } -static void EmitLong(struct As *a, unsigned long x) { +static void EmitLong(struct As *a, uint128_t i) { + uint32_t x = i; unsigned char b[4]; - b[0] = x >> 000; - b[1] = x >> 010; - b[2] = x >> 020; - b[3] = x >> 030; + b[0] = (x & 0x000000ff) >> 000; + b[1] = (x & 0x0000ff00) >> 010; + b[2] = (x & 0x00ff0000) >> 020; + b[3] = (x & 0xff000000) >> 030; EmitData(a, b, 4); } -void EmitQuad(struct As *a, unsigned long x) { +void EmitQuad(struct As *a, uint128_t i) { + uint64_t x = i; unsigned char b[8]; - b[0] = x >> 000; - b[1] = x >> 010; - b[2] = x >> 020; - b[3] = x >> 030; - b[4] = x >> 040; - b[5] = x >> 050; - b[6] = x >> 060; - b[7] = x >> 070; + b[0] = (x & 0x00000000000000ff) >> 000; + b[1] = (x & 0x000000000000ff00) >> 010; + b[2] = (x & 0x0000000000ff0000) >> 020; + b[3] = (x & 0x00000000ff000000) >> 030; + b[4] = (x & 0x000000ff00000000) >> 040; + b[5] = (x & 0x0000ff0000000000) >> 050; + b[6] = (x & 0x00ff000000000000) >> 060; + b[7] = (x & 0xff00000000000000) >> 070; EmitData(a, b, 8); } +void EmitOcta(struct As *a, uint128_t i) { + EmitQuad(a, i); + EmitQuad(a, i >> 64); +} + static void EmitVarword(struct As *a, unsigned long x) { if (x > 255) EmitVarword(a, x >> 8); EmitByte(a, x); @@ -1381,7 +1310,7 @@ static void EmitVarword(struct As *a, unsigned long x) { static void OnSleb128(struct As *a, struct Slice s) { int c; - long x; + int128_t x; for (;;) { x = GetInt(a); for (;;) { @@ -1401,7 +1330,7 @@ static void OnSleb128(struct As *a, struct Slice s) { static void OnUleb128(struct As *a, struct Slice s) { int c; - unsigned long x; + uint128_t x; for (;;) { x = GetInt(a); do { @@ -1415,6 +1344,23 @@ static void OnUleb128(struct As *a, struct Slice s) { } } +static void OnZleb128(struct As *a, struct Slice s) { + int c; + uint128_t x; + for (;;) { + x = GetInt(a); + x = (x << 1) ^ ((int128_t)x >> 127); + do { + c = x & 0x7f; + x >>= 7; + if (x) c |= 0x80; + EmitByte(a, c); + } while (x); + if (IsSemicolon(a)) break; + ConsumeComma(a); + } +} + static void OnZero(struct As *a, struct Slice s) { long n; char *p; @@ -1460,7 +1406,7 @@ static long GetRelaAddend(int kind) { } static void EmitExpr(struct As *a, int expr, int kind, - void emitter(struct As *, unsigned long)) { + void emitter(struct As *, uint128_t)) { if (expr == -1) { emitter(a, 0); } else if (a->exprs.p[expr].kind == EX_INT) { @@ -1477,7 +1423,7 @@ static void EmitExpr(struct As *a, int expr, int kind, } static void OpInt(struct As *a, int kind, - void emitter(struct As *, unsigned long)) { + void emitter(struct As *, uint128_t)) { for (;;) { EmitExpr(a, Parse(a), kind, emitter); if (IsSemicolon(a)) break; @@ -1501,6 +1447,10 @@ static void OnQuad(struct As *a, struct Slice s) { OpInt(a, R_X86_64_64, EmitQuad); } +static void OnOcta(struct As *a, struct Slice s) { + OpInt(a, R_X86_64_64, EmitOcta); +} + static void OnFloat(struct As *a, struct Slice s) { float f; char b[4]; @@ -1620,7 +1570,7 @@ static void OnPrevious(struct As *a, struct Slice s) { static void OnAlign(struct As *a, struct Slice s) { long i, n, align, fill, maxskip; align = GetInt(a); - if (__builtin_popcountl(align) != 1) Fail(a, "alignment not power of 2"); + if (!IS2POW(align)) Fail(a, "alignment not power of 2"); fill = (a->sections.p[a->section].flags & SHF_EXECINSTR) ? 0x90 : 0; maxskip = 268435456; if (IsComma(a)) { @@ -1910,7 +1860,7 @@ static unsigned long MakeKey64(const char *p, int n) { return READ64BE(k); } -static unsigned __int128 MakeKey128(const char *p, int n) { +static uint128_t MakeKey128(const char *p, int n) { char k[16] = {0}; CopyLower(k, p, n); return READ128BE(k); @@ -2088,7 +2038,7 @@ static void EmitImm(struct As *a, int reg, int imm) { static void EmitModrm(struct As *a, int reg, int modrm, int disp) { int relo, mod, rm; - void (*emitter)(struct As *, unsigned long); + void (*emitter)(struct As *, uint128_t); reg &= 7; reg <<= 3; if (modrm & ISREG) { @@ -2717,12 +2667,14 @@ static noinline void OpJmpImpl(struct As *a, int cc) { if (IsPunct(a, a->i, '*')) ++a->i; modrm = RemoveRexw(ParseModrm(a, &disp)); if (cc == -1) { - if ((modrm & ISRIP) || !(modrm & (HASBASE | HASINDEX))) { - modrm |= ISRIP; - a->pcrelative = R_X86_64_GOTPCRELX; + if (modrm & (ISREG | ISRIP | HASINDEX | HASBASE)) { + if (modrm & ISRIP) a->pcrelative = R_X86_64_GOTPCRELX; + EmitRexOpModrm(a, 0xFF, 4, modrm, disp, 0); + a->pcrelative = 0; + } else { + EmitByte(a, 0xE9); + EmitExpr(a, disp, R_X86_64_PC32, EmitLong); } - EmitRexOpModrm(a, 0xFF, 4, modrm, disp, 0); - a->pcrelative = 0; } else { EmitByte(a, 0x0F); EmitByte(a, 0x80 + cc); @@ -3141,6 +3093,7 @@ static const struct Directive8 { {".loc", OnLoc}, // {".local", OnLocal}, // {".long", OnLong}, // + {".octa", OnOcta}, // {".quad", OnQuad}, // {".section", OnSection}, // {".short", OnWord}, // @@ -3154,6 +3107,7 @@ static const struct Directive8 { {".weak", OnWeak}, // {".word", OnWord}, // {".zero", OnZero}, // + {".zleb128", OnZleb128}, // {"adc", OnAdc}, // {"adcb", OnAdc}, // {"adcl", OnAdc}, // @@ -4024,7 +3978,7 @@ static void PrintThings(struct As *a) { a->sauces.p[a->things.p[i].s].line); switch (a->things.p[i].t) { case TT_INT: - printf("TT_INT %ld\n", a->ints.p[a->things.p[i].i]); + printf("TT_INT %jd\n", a->ints.p[a->things.p[i].i]); break; case TT_FLOAT: g_xfmt_p(fbuf, &a->floats.p[a->things.p[i].i], 19, sizeof(fbuf), 0); @@ -4038,10 +3992,10 @@ static void PrintThings(struct As *a) { printf("TT_PUNCT %s\n", PunctToStr(a->things.p[i].i, pbuf)); break; case TT_BACKWARD: - printf("TT_BACKWARD %d\n", a->ints.p[a->things.p[i].i]); + printf("TT_BACKWARD %jd\n", a->ints.p[a->things.p[i].i]); break; case TT_FORWARD: - printf("TT_FORWARD %d\n", a->ints.p[a->things.p[i].i]); + printf("TT_FORWARD %jd\n", a->ints.p[a->things.p[i].i]); break; default: abort(); diff --git a/third_party/chibicc/asm.c b/third_party/chibicc/asm.c index 278d33c66..120b60fe8 100644 --- a/third_party/chibicc/asm.c +++ b/third_party/chibicc/asm.c @@ -68,6 +68,7 @@ static void DecodeAsmConstraints(AsmOperand *op) { case 'J': // i∊[0,63] 6 bits for 64-bit shifts case 'N': // i∊[0,255] in/out immediate byte case 'K': // i∊[-128,127] signed byte integer + case 'e': // i∊[-2^31,2^31) for sign-extending case 'Z': // i∊[0,2³²) for zero-extending case 'L': // i∊{0xFF,0xFFFF,0xFFFFFFFF} op->type |= kAsmImm; diff --git a/third_party/chibicc/chibicc.c b/third_party/chibicc/chibicc.c index cf4d69f10..9b64c5ecc 100644 --- a/third_party/chibicc/chibicc.c +++ b/third_party/chibicc/chibicc.c @@ -1,5 +1,8 @@ +#include "libc/calls/calls.h" #include "libc/calls/struct/siginfo.h" #include "libc/calls/ucontext.h" +#include "libc/runtime/gc.internal.h" +#include "libc/runtime/runtime.h" #include "libc/x/x.h" #include "third_party/chibicc/chibicc.h" @@ -36,6 +39,7 @@ bool opt_verbose; static bool opt_A; static bool opt_E; static bool opt_J; +static bool opt_P; static bool opt_M; static bool opt_MD; static bool opt_MMD; @@ -60,10 +64,13 @@ static StringArray std_include_paths; char *base_file; static char *output_file; static StringArray input_paths; -static char **tmpfiles; +char **chibicc_tmpfiles; static void usage(int status) { - fprintf(stderr, "chibicc [ -o ] \n"); + char *p; + size_t n; + p = gc(xslurp("/zip/third_party/chibicc/help.txt", &n)); + xwrite(1, p, n); exit(status); } @@ -94,6 +101,7 @@ static void add_default_include_paths(char *argv0) { /* strarray_push(&include_paths, buf); */ // Add standard include paths. /* strarray_push(&include_paths, "."); */ + strarray_push(&include_paths, "/zip/.c"); // Keep a copy of the standard include paths for -MMD option. for (int i = 0; i < include_paths.len; i++) { strarray_push(&std_include_paths, include_paths.data[i]); @@ -157,6 +165,10 @@ static void PrintMemoryUsage(void) { sizeof(Obj) * alloc_obj_count); fprintf(stderr, "allocated %,ld types (%,ld bytes)\n", alloc_type_count, sizeof(Type) * alloc_type_count); + fprintf(stderr, "chibicc hashmap hits %,ld\n", chibicc_hashmap_hits); + fprintf(stderr, "chibicc hashmap miss %,ld\n", chibicc_hashmap_miss); + fprintf(stderr, "as hashmap hits %,ld\n", as_hashmap_hits); + fprintf(stderr, "as hashmap miss %,ld\n", as_hashmap_miss); } static void strarray_push_comma(StringArray *a, char *s) { @@ -177,10 +189,10 @@ static void parse_args(int argc, char **argv) { } } } - StringArray idirafter = {}; + StringArray idirafter = {0}; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "-###")) { - opt_hash_hash_hash = true; + opt_verbose = opt_hash_hash_hash = true; } else if (!strcmp(argv[i], "-cc1")) { opt_cc1 = true; } else if (!strcmp(argv[i], "--help")) { @@ -202,6 +214,8 @@ static void parse_args(int argc, char **argv) { opt_common = false; } else if (!strcmp(argv[i], "-fno-builtin")) { opt_no_builtin = true; + } else if (!strcmp(argv[i], "-save-temps")) { + opt_save_temps = true; } else if (!strcmp(argv[i], "-c")) { opt_c = true; } else if (!strcmp(argv[i], "-E")) { @@ -210,6 +224,8 @@ static void parse_args(int argc, char **argv) { opt_J = true; } else if (!strcmp(argv[i], "-A")) { opt_A = true; + } else if (!strcmp(argv[i], "-P")) { + opt_P = true; } else if (!strcmp(argv[i], "-I")) { strarray_push(&include_paths, argv[++i]); } else if (startswith(argv[i], "-I")) { @@ -350,9 +366,10 @@ static char *replace_extn(char *tmpl, char *extn) { } static void cleanup(void) { - if (tmpfiles && !opt_save_temps) { - for (int i = 0; tmpfiles[i]; i++) { - unlink(tmpfiles[i]); + size_t i; + if (chibicc_tmpfiles && !opt_save_temps) { + for (i = 0; chibicc_tmpfiles[i]; i++) { + unlink(chibicc_tmpfiles[i]); } } } @@ -363,9 +380,9 @@ static char *create_tmpfile(void) { if (fd == -1) error("mkstemp failed: %s", strerror(errno)); close(fd); static int len = 2; - tmpfiles = realloc(tmpfiles, sizeof(char *) * len); - tmpfiles[len - 2] = path; - tmpfiles[len - 1] = NULL; + chibicc_tmpfiles = realloc(chibicc_tmpfiles, sizeof(char *) * len); + chibicc_tmpfiles[len - 2] = path; + chibicc_tmpfiles[len - 1] = NULL; len++; return path; } @@ -377,12 +394,50 @@ static void handle_exit(bool ok) { } } +static bool NeedsShellQuotes(const char *s) { + if (*s) { + for (;;) { + switch (*s++ & 255) { + case 0: + return false; + case '-': + case '.': + case '/': + case '_': + case '0' ... '9': + case 'A' ... 'Z': + case 'a' ... 'z': + break; + default: + return true; + } + } + } else { + return true; + } +} + static bool run_subprocess(char **argv) { - // If -### is given, dump the subprocess's command line. - if (opt_hash_hash_hash) { - fprintf(stderr, "%s", argv[0]); - for (int i = 1; argv[i]; i++) fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\n"); + int rc, ws; + size_t i, j, n; + if (opt_verbose) { + for (i = 0; argv[i]; i++) { + fputc(' ', stderr); + if (opt_hash_hash_hash && NeedsShellQuotes(argv[i])) { + fputc('\'', stderr); + for (j = 0; argv[i][j]; ++j) { + if (argv[i][j] != '\'') { + fputc(argv[i][j], stderr); + } else { + fputs("'\"'\"'", stderr); + } + } + fputc('\'', stderr); + } else { + fputs(argv[i], stderr); + } + } + fputc('\n', stderr); } if (!vfork()) { // Child process. Run a new command. @@ -390,13 +445,9 @@ static bool run_subprocess(char **argv) { _exit(1); } // Wait for the child process to finish. - int status; - for (;;) { - if (wait(&status) <= 0) { - break; - } - } - return !status; + do rc = wait(&ws); + while (rc == -1 && errno == EINTR); + return WIFEXITED(ws) && WEXITSTATUS(ws) == 0; } static bool run_cc1(int argc, char **argv, char *input, char *output) { @@ -480,7 +531,7 @@ static void print_dependencies(void) { File **files = get_input_files(); for (int i = 0; files[i]; i++) { if (opt_MMD && in_std_include_path(files[i]->name)) continue; - fprintf(out, " \\\n %s", files[i]->name); + fprintf(out, " \\\n\t%s", files[i]->name); } fprintf(out, "\n\n"); if (opt_MP) { @@ -559,6 +610,10 @@ static void cc1(void) { output_javadown(output_file, prog); return; } + if (opt_P) { + output_bindings_python(output_file, prog, tok2); + return; + } FILE *out = open_file(output_file); codegen(prog, out); fclose(out); @@ -573,7 +628,7 @@ static int CountArgv(char **argv) { static void assemble(char *input, char *output) { char *as = getenv("AS"); if (!as || !*as) as = "as"; - StringArray arr = {}; + StringArray arr = {0}; strarray_push(&arr, as); strarray_push(&arr, "-W"); strarray_push(&arr, "-I."); @@ -584,9 +639,11 @@ static void assemble(char *input, char *output) { strarray_push(&arr, input); strarray_push(&arr, "-o"); strarray_push(&arr, output); - strarray_push(&arr, NULL); if (1) { + bool kludge = opt_save_temps; + opt_save_temps = true; Assembler(CountArgv(arr.data), arr.data); + opt_save_temps = kludge; } else { handle_exit(run_subprocess(arr.data)); } @@ -595,7 +652,7 @@ static void assemble(char *input, char *output) { static void run_linker(StringArray *inputs, char *output) { char *ld = getenv("LD"); if (!ld || !*ld) ld = "ld"; - StringArray arr = {}; + StringArray arr = {0}; strarray_push(&arr, ld); strarray_push(&arr, "-o"); strarray_push(&arr, output); @@ -618,7 +675,6 @@ static void run_linker(StringArray *inputs, char *output) { for (int i = 0; i < inputs->len; i++) { strarray_push(&arr, inputs->data[i]); } - strarray_push(&arr, NULL); handle_exit(run_subprocess(arr.data)); } @@ -630,9 +686,16 @@ int chibicc(int argc, char **argv) { showcrashreports(); sigaction(SIGINT, &(struct sigaction){.sa_sigaction = OnCtrlC}, NULL); atexit(cleanup); - init_macros(); + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-cc1")) { + opt_cc1 = true; + break; + } + } + if (opt_cc1) init_macros(); parse_args(argc, argv); if (opt_cc1) { + init_macros_conditional(); add_default_include_paths(argv[0]); cc1(); return 0; @@ -640,8 +703,8 @@ int chibicc(int argc, char **argv) { if (input_paths.len > 1 && opt_o && (opt_c || opt_S | opt_E)) { error("cannot specify '-o' with '-c,' '-S' or '-E' with multiple files"); } - StringArray ld_args = {}; - StringArray dox_args = {}; + StringArray ld_args = {0}; + StringArray dox_args = {0}; for (int i = 0; i < input_paths.len; i++) { char *input = input_paths.data[i]; if (!strncmp(input, "-l", 2)) { @@ -701,6 +764,11 @@ int chibicc(int argc, char **argv) { handle_exit(run_cc1(argc, argv, input, NULL)); continue; } + // Python Bindings + if (opt_P) { + handle_exit(run_cc1(argc, argv, input, opt_o ? opt_o : "/dev/stdout")); + continue; + } // Compile if (opt_S) { handle_exit(run_cc1(argc, argv, input, output)); diff --git a/third_party/chibicc/chibicc.h b/third_party/chibicc/chibicc.h index aef946e12..193acdc56 100644 --- a/third_party/chibicc/chibicc.h +++ b/third_party/chibicc/chibicc.h @@ -96,6 +96,7 @@ struct thatispacked Token { int line_no; // Line number int line_delta; // Line number TokenKind kind; // Token kind + uint8_t kw; // Keyword Phash bool at_bol; // True if this token is at beginning of line bool has_space; // True if this token follows a space character char *loc; // Token location @@ -159,6 +160,7 @@ extern HashMap macros; char *search_include_paths(char *); void init_macros(void); +void init_macros_conditional(void); void define_macro(char *, char *); void undef_macro(char *); Token *preprocess(Token *); @@ -448,7 +450,10 @@ struct Type { bool is_unsigned; // unsigned or signed bool is_atomic; // true if _Atomic bool is_const; // const + bool is_restrict; // restrict + bool is_volatile; // volatile bool is_ms_abi; // microsoft abi + bool is_static; // for array parameter pointer Type *origin; // for type compatibility check // Pointer-to or array-of type. We intentionally use the same member // to represent pointer/array duality in C. @@ -462,7 +467,7 @@ struct Type { // Declaration Token *name; Token *name_pos; - // Array + // Array or decayed pointer int array_len; int vector_size; // Variable-length array @@ -570,6 +575,9 @@ struct HashMap { int used; }; +extern long chibicc_hashmap_hits; +extern long chibicc_hashmap_miss; + void *hashmap_get(HashMap *, char *); void *hashmap_get2(HashMap *, char *, int); void hashmap_put(HashMap *, char *, void *); @@ -597,6 +605,7 @@ extern bool opt_sse3; extern bool opt_sse4; extern bool opt_verbose; extern char *base_file; +extern char **chibicc_tmpfiles; int chibicc(int, char **); @@ -631,8 +640,16 @@ void drop_dox(const StringArray *, const char *); // as.c // +extern long as_hashmap_hits; +extern long as_hashmap_miss; + void Assembler(int, char **); +// +// pybind.c +// +void output_bindings_python(const char *, Obj *, Token *); + COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_CHIBICC_H_ */ diff --git a/third_party/chibicc/chibicc.mk b/third_party/chibicc/chibicc.mk index 074863c57..1605b20c6 100644 --- a/third_party/chibicc/chibicc.mk +++ b/third_party/chibicc/chibicc.mk @@ -25,6 +25,7 @@ THIRD_PARTY_CHIBICC2_A = o/$(MODE)/third_party/chibicc/chibicc2.a THIRD_PARTY_CHIBICC_A_FILES := $(wildcard third_party/chibicc/*) THIRD_PARTY_CHIBICC_A_HDRS = $(filter %.h,$(THIRD_PARTY_CHIBICC_A_FILES)) THIRD_PARTY_CHIBICC_A_SRCS = $(filter %.c,$(THIRD_PARTY_CHIBICC_A_FILES)) +THIRD_PARTY_CHIBICC_A_INCS = $(filter %.inc,$(THIRD_PARTY_CHIBICC_A_FILES)) THIRD_PARTY_CHIBICC_DEFINES = \ -DCRT=\"$(CRT)\" \ @@ -94,6 +95,7 @@ o/$(MODE)/third_party/chibicc/chibicc.com.dbg: \ $(THIRD_PARTY_CHIBICC_A) \ $(APE) \ $(CRT) \ + o/$(MODE)/third_party/chibicc/help.txt.zip.o \ o/$(MODE)/third_party/chibicc/chibicc.main.o \ $(THIRD_PARTY_CHIBICC_A).pkg @$(APELINK) @@ -102,6 +104,7 @@ o/$(MODE)/third_party/chibicc/chibicc2.com.dbg: \ $(THIRD_PARTY_CHIBICC2_A) \ $(APE) \ $(CRT) \ + o/$(MODE)/third_party/chibicc/help.txt.zip.o \ o/$(MODE)/third_party/chibicc/chibicc.main.chibicc.o \ $(THIRD_PARTY_CHIBICC2_A).pkg @$(APELINK) @@ -129,6 +132,7 @@ o/$(MODE)/%.chibicc2.o: %.c o/$(MODE)/third_party/chibicc/chibicc2.com.dbg THIRD_PARTY_CHIBICC_LIBS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x))) THIRD_PARTY_CHIBICC_SRCS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_SRCS)) THIRD_PARTY_CHIBICC_HDRS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_HDRS)) +THIRD_PARTY_CHIBICC_INCS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_INCS)) THIRD_PARTY_CHIBICC_CHECKS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_CHECKS)) THIRD_PARTY_CHIBICC_OBJS = $(foreach x,$(THIRD_PARTY_CHIBICC_ARTIFACTS),$($(x)_OBJS)) $(THIRD_PARTY_CHIBICC_OBJS): $(BUILD_FILES) third_party/chibicc/chibicc.mk diff --git a/third_party/chibicc/codegen.c b/third_party/chibicc/codegen.c index 593652aa4..298975e83 100644 --- a/third_party/chibicc/codegen.c +++ b/third_party/chibicc/codegen.c @@ -22,32 +22,31 @@ void flushln(void) { } static void processln(char *nextline) { +#define LASTEQUAL(S) (lastlen == strlen(S) && !memcmp(lastline, S, lastlen)) + size_t lastlen; if (lastline) { + lastlen = strlen(lastline); // unsophisticated optimization pass to reduce asm noise a little bit - if ((!strcmp(lastline, "\txor\t%eax,%eax") && - !strcmp(nextline, "\tcltq")) || - (!strcmp(lastline, "\tmov\t$0x1,%eax") && - !strcmp(nextline, "\tcltq")) || - (!strcmp(lastline, "\tmovslq\t(%rax),%rax") && - !strcmp(nextline, "\tcltq"))) { + if ((LASTEQUAL("\txor\t%eax,%eax") && !strcmp(nextline, "\tcltq")) || + (LASTEQUAL("\tmov\t$0x1,%eax") && !strcmp(nextline, "\tcltq")) || + (LASTEQUAL("\tmovslq\t(%rax),%rax") && !strcmp(nextline, "\tcltq"))) { free(nextline); - } else if (!strcmp(lastline, "\tmov\t(%rax),%rax") && + } else if (LASTEQUAL("\tmov\t(%rax),%rax") && !strcmp(nextline, "\tpush\t%rax")) { free(lastline); free(nextline); lastline = strdup("\tpush\t(%rax)"); - } else if (!strcmp(lastline, "\tmov\t$0x1,%eax") && + } else if (LASTEQUAL("\tmov\t$0x1,%eax") && !strcmp(nextline, "\tpush\t%rax")) { free(lastline); free(nextline); lastline = strdup("\tpush\t$1"); - } else if (!strcmp(lastline, "\tpush\t(%rax)") && + } else if (LASTEQUAL("\tpush\t(%rax)") && !strcmp(nextline, "\tpop\t%rdi")) { free(lastline); free(nextline); lastline = strdup("\tmov\t(%rax),%rdi"); - } else if (!strcmp(lastline, "\tpush\t%rax") && - !strcmp(nextline, "\tpop\t%rdi")) { + } else if (LASTEQUAL("\tpush\t%rax") && !strcmp(nextline, "\tpop\t%rdi")) { free(lastline); free(nextline); lastline = strdup("\tmov\t%rax,%rdi"); @@ -58,6 +57,7 @@ static void processln(char *nextline) { } else { lastline = nextline; } +#undef LASTEQUAL } static void emitlin(char *nextline) { @@ -184,6 +184,8 @@ static void print_align(int align) { } void print_loc(int64_t file, int64_t line) { + // TODO: This is broken if file is different? See gperf codegen. + return; static int64_t lastfile = -1; static int64_t lastline = -1; char *locbuf, *p; @@ -194,6 +196,7 @@ void print_loc(int64_t file, int64_t line) { *p++ = ' '; int64toarray_radix10(line, p); emitlin(locbuf); + free(locbuf); lastfile = file; lastline = line; } @@ -259,6 +262,14 @@ static char *reg_ax(int sz) { UNREACHABLE(); } +static const char *gotpcrel(void) { + if (opt_pic) { + return "@gotpcrel(%rip)"; + } else { + return ""; + } +} + // Compute the absolute address of a given node. // It's an error if a given node does not reside in memory. // asm() wants this to not clobber flags or regs other than rax. @@ -1502,11 +1513,10 @@ void gen_expr(Node *node) { load(node->cas_old->ty->base); pop("%rdx"); // new pop("%rdi"); // addr - int sz = node->cas_addr->ty->base->size; - println("\tlock cmpxchg %s,(%%rdi)", reg_dx(sz)); + println("\tlock cmpxchg %s,(%%rdi)", reg_dx(node->ty->size)); emitlin("\tsete\t%cl"); emitlin("\tje\t1f"); - println("\tmov\t%s,(%%r8)", reg_ax(sz)); + println("\tmov\t%s,(%%r8)", reg_ax(node->ty->size)); emitlin("1:"); emitlin("\tmovzbl\t%cl,%eax"); return; @@ -1516,8 +1526,7 @@ void gen_expr(Node *node) { push(); gen_expr(node->rhs); pop("%rdi"); - int sz = node->lhs->ty->base->size; - println("\txchg\t%s,(%%rdi)", reg_ax(sz)); + println("\txchg\t%s,(%%rdi)", reg_ax(node->ty->size)); return; } case ND_FPCLASSIFY: @@ -2314,9 +2323,9 @@ static void emit_function_hook(void) { if (opt_nop_mcount) { print_profiling_nop(); } else if (opt_fentry) { - emitlin("\tcall\t__fentry__@gotpcrel(%rip)"); + println("\tcall\t__fentry__%s", gotpcrel()); } else if (opt_pg) { - emitlin("\tcall\tmcount@gotpcrel(%rip)"); + println("\tcall\tmcount%s", gotpcrel()); } else { print_profiling_nop(); } diff --git a/third_party/chibicc/file.c b/third_party/chibicc/file.c index 0ed48ba26..d4fe8c60a 100644 --- a/third_party/chibicc/file.c +++ b/third_party/chibicc/file.c @@ -45,19 +45,51 @@ char *skip_bom(char *p) { // Replaces \r or \r\n with \n. void canonicalize_newline(char *p) { - int i = 0, j = 0; - while (p[i]) { - if (p[i] == '\r' && p[i + 1] == '\n') { - i += 2; - p[j++] = '\n'; - } else if (p[i] == '\r') { - i++; - p[j++] = '\n'; + char *q = p; + for (;;) { +#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'( + typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); + typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + if (!((uintptr_t)p & 15)) { + xmm_t v; + unsigned m; + xmm_t z = {0}; + xmm_t s = {'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r', + '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'}; + xmm_t t = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; + for (;;) { + v = *(const xmm_t *)p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == s) | (v == t)); + if (!m) { + *(xmm_u *)q = v; + p += 16; + q += 16; + } else { + m = bsf(m); + memmove(q, p, m); + p += m; + q += m; + break; + } + } + } +#endif + if (p[0]) { + if (p[0] == '\r' && p[1] == '\n') { + p += 2; + *q++ = '\n'; + } else if (p[0] == '\r') { + p += 1; + *q++ = '\n'; + } else { + *q++ = *p++; + } } else { - p[j++] = p[i++]; + break; } } - p[j] = '\0'; + *q = '\0'; } // Removes backslashes followed by a newline. @@ -68,37 +100,74 @@ void remove_backslash_newline(char *p) { // This counter maintain the number of newlines we have removed. int n = 0; bool instring = false; - while (p[i]) { - if (instring) { - if (p[i] == '"' && p[i - 1] != '\\') { - instring = false; - } - } else { - if (p[i] == '"') { - instring = true; - } else if (p[i] == '/' && p[i + 1] == '*') { - p[j++] = p[i++]; - p[j++] = p[i++]; - while (p[i]) { - if (p[i] == '*' && p[i + 1] == '/') { - p[j++] = p[i++]; - p[j++] = p[i++]; - break; - } else { - p[j++] = p[i++]; - } + for (;;) { +#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) // :'( + typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); + typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + if (!((uintptr_t)(p + i) & 15)) { + xmm_t v; + unsigned m; + xmm_t A = {0}; + xmm_t B = {'/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/'}; + xmm_t C = {'"', '"', '"', '"', '"', '"', '"', '"', + '"', '"', '"', '"', '"', '"', '"', '"'}; + xmm_t D = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'}; + xmm_t E = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', + '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; + for (;;) { + v = *(const xmm_t *)(p + i); + m = __builtin_ia32_pmovmskb128((v == A) | (v == B) | (v == C) | + (v == D) | (v == E)); + if (!m) { + *(xmm_u *)(p + j) = v; + i += 16; + j += 16; + } else { + m = bsf(m); + memmove(p + j, p + i, m); + i += m; + j += m; + break; } - continue; } } - if (p[i] == '\\' && p[i + 1] == '\n') { - i += 2; - n++; - } else if (p[i] == '\n') { - p[j++] = p[i++]; - for (; n > 0; n--) p[j++] = '\n'; +#endif + if (p[i]) { + if (instring) { + if (p[i] == '"' && p[i - 1] != '\\') { + instring = false; + } + } else { + if (p[i] == '"') { + instring = true; + } else if (p[i] == '/' && p[i + 1] == '*') { + p[j++] = p[i++]; + p[j++] = p[i++]; + while (p[i]) { + if (p[i] == '*' && p[i + 1] == '/') { + p[j++] = p[i++]; + p[j++] = p[i++]; + break; + } else { + p[j++] = p[i++]; + } + } + continue; + } + } + if (p[i] == '\\' && p[i + 1] == '\n') { + i += 2; + n++; + } else if (p[i] == '\n') { + p[j++] = p[i++]; + for (; n > 0; n--) p[j++] = '\n'; + } else { + p[j++] = p[i++]; + } } else { - p[j++] = p[i++]; + break; } } for (; n > 0; n--) p[j++] = '\n'; diff --git a/third_party/chibicc/hashmap.c b/third_party/chibicc/hashmap.c index 90902ae08..2e8f3512a 100644 --- a/third_party/chibicc/hashmap.c +++ b/third_party/chibicc/hashmap.c @@ -1,13 +1,18 @@ // This is an implementation of the open-addressing hash table. +#include "libc/nexgen32e/crc32.h" #include "third_party/chibicc/chibicc.h" #define INIT_SIZE 16 // initial hash bucket size -#define LOW_WATERMARK 50 // keep usage below 50% after rehashing -#define HIGH_WATERMARK 70 // perform rehash when usage exceeds 70% +#define LOW_WATERMARK 20 // keep usage below 50% after rehashing +#define HIGH_WATERMARK 40 // perform rehash when usage exceeds 70% #define TOMBSTONE ((void *)-1) // represents deleted hash table entry -static uint64_t fnv_hash(char *s, int len) { +long chibicc_hashmap_hits; +long chibicc_hashmap_miss; + +static inline uint64_t fnv_hash(char *s, int len) { + return crc32c(0, s, len); uint64_t hash = 0xcbf29ce484222325; for (int i = 0; i < len; i++) { hash *= 0x100000001b3; @@ -44,8 +49,17 @@ static void rehash(HashMap *map) { } static bool match(HashEntry *ent, char *key, int keylen) { - return ent->key && ent->key != TOMBSTONE && ent->keylen == keylen && - memcmp(ent->key, key, keylen) == 0; + if (ent->key && ent->key != TOMBSTONE) { + if (ent->keylen == keylen && !memcmp(ent->key, key, keylen)) { + ++chibicc_hashmap_hits; + return true; + } else { + ++chibicc_hashmap_miss; + return false; + } + } else { + return false; + } } static HashEntry *get_entry(HashMap *map, char *key, int keylen) { diff --git a/third_party/chibicc/help.txt b/third_party/chibicc/help.txt new file mode 100644 index 000000000..2d5bba64b --- /dev/null +++ b/third_party/chibicc/help.txt @@ -0,0 +1,651 @@ +SYNOPSIS + + chibicc.com [FLAGS] INPUTS + +DESCRIPTION + + chibicc - A Small GNU-Style ISO/IEC 9899:2011 Standard Compiler + +OVERVIEW + + chibicc is the simplest/tiniest/hackable/readable c11 compiler in the + world that can compile code quickly and consistently across platforms + +FLAGS + + -o PATH + + Specifies path of output. + + -c + + Objectify the source file, but do not link. + + -S + + Compile the source file, but do not objectify. + + -E + + Preprocess the source file, but do not compile. + Output defaults to stdout. + + -D TOKEN[=VALUE] + + Defines preprocessor token. + + -U TOKEN + + Undefines preprocessor token. + + -include DIR + + Add include. + + -I DIR + -iquote DIR + -isystem DIR + + Adds include directory. + + -x c (default for .c files) + -x assembler (default for .s files) + -x assembler-with-cpp (default for .S files) + + Explicitly specifies programming language. + + -Wa arg1[,arg2...] + -Xassembler arg1[,arg2...] + + Appends opaque arguments passed along to assembler. + + -Wl arg1[,arg2...] + -Xlinker arg1[,arg2...] + + Appends opaque arguments passed along to linker. + + -v + + Enables verbose mode. + Lines with subprocess commands start with a space. + + -### + + Implies -v and enables shell command argument quoting. + + --version + + Shows compiler version. + + --help + + Shows this information. + + +CODEGEN + + -pg + -mfentry + -mnop-mcount + -mrecord-mcount + + Controls output of profiling hooks in function prologues. + + -fdata-sections + -ffunction-sections + + Controls granularity of code visible to the linker. + + -msse3 + -msse4 + -msse4.1 + -msse4.2 + -mpopcnt + + Specifies microarchitectural features. Default is K8. + + -fpie + -fpic + -fPIC + + Controls output of position independent code. + + -fcommon + -fno-common + + Controls usage of traditional STT_COMMON objects. + + +MAKEFILE + + -M + + Generate makefile header dependency code. + Output defaults to stdout. + + -MD + + Generate makefile header dependency code, and compile. + Output defaults to output path with .d extension. + + -MMD + + Generate makefile header dependency code, and compile. + Default include roots are excluded as dependencies. + Output defaults to output path with .d extension. + + -MF PATH + + Specifies output path of header dependency code. + + -MT NAME + + Specifies name of target in generated makefile code. + + -MQ NAME + + Specifies name of target in makefile code w/ quoting. + + +INTERNALS + + -P + + Generate Python bindings. + + -A + + Print abstract syntax tree. + + -J + + Generate HTML documentation for public APIs based on Javadoc + comments containing Markdown. + + +INTEGRAL TYPES + + _Bool + char + short + int + long + long long + __int128 + signed char + unsigned char + unsigned short + unsigned int + unsigned long + unsigned long long + unsigned __int128 + +FLOATING POINT TYPES + + float + double + long double + + +BUILTIN FUNCTIONS + + T __builtin_expect(T, int) + unsigned long __builtin_offsetof(typename, token) + int __builtin_reg_class(typename) + num __builtin_constant_p(expr) + int __builtin_unreachable() + void * __builtin_frame_address(int) + _Bool __builtin_types_compatible_p(typename, typename) + T __builtin_compare_and_swap(T *addr, T old, T neu) + T __builtin_atomic_exchange(T *addr, T neu) + T * __builtin_assume_aligned(T *addr) + _Bool __builtin_add_overflow(T, T, T *) + _Bool __builtin_sub_overflow(T, T, T *) + _Bool __builtin_mul_overflow(T, T, T *) + _Bool __builtin_neg_overflow(T, T, T *) + void * __builtin_alloca(unsigned long) + void __builtin_trap() + int __builtin_clz(int) + int __builtin_clzl(long) + int __builtin_clzll(long long) + int __builtin_ctz(int) + int __builtin_ctzl(long) + int __builtin_ctzll(long long) + int __builtin_ffs(int) + int __builtin_ffsl(long) + int __builtin_ffsll(long long) + int __builtin_popcount(unsigned int) + long __builtin_popcountl(unsigned long) + long __builtin_popcountll(unsigned long) + unsigned long __builtin_strlen(char *) + char * __builtin_strstr(char *, char *) + char * __builtin_strchr(char *, int) + void * __builtin_memcpy(void *, void *, unsigned long) + char * __builtin_strpbrk(char *, char *) + unsigned short __builtin_bswap16(unsigned short) + unsigned int __builtin_bswap32(unsigned int) + unsigned long __builtin_bswap64(unsigned long) + int __builtin_isnan(flonum) + int __builtin_isinf(flonum) + int __builtin_isfinite(flonum) + int __builtin_fpclassify(flonum) + int __builtin_isless(flonum, flonum) + int __builtin_isgreater(flonum, flonum) + int __builtin_isunordered(flonum, flonum) + int __builtin_islessequal(flonum, flonum) + int __builtin_islessgreater(flonum, flonum) + int __builtin_isgreaterequal(flonum, flonum) + double __builtin_nan(char *) + float __builtin_nanf(char *) + long double __builtin_nanl(char *) + long __builtin_signbit(double) + int __builtin_signbitf(float) + int __builtin_signbitl(long double) + double __builtin_huge_val() + float __builtin_huge_valf() + long double __builtin_huge_vall() + double __builtin_fabs(double) + float __builtin_fabsf(float) + long double __builtin_fabsl(long double) + double __builtin_logb(double) + float __builtin_logbf(float) + long double __builtin_logbl(long double) + double __builtin_fmax(double, double) + float __builtin_fmaxf(float, float) + long double __builtin_fmaxl(long double, long double) + double __builtin_fmin(double, double) + float __builtin_fminf(float, float) + long double __builtin_fminl(long double, long double) + double __builtin_copysign(double, double) + float __builtin_copysignf(float, float) + long double __builtin_copysignl(long double, long double) + + +BUILTIN OBJECTS + + __func__ + __FUNCTION__ + __va_area__ + __alloca_size__ + +BUILTIN MACROS + + __FILE__ + __LINE__ + __DATE__ + __TIME__ + __COUNTER__ + __TIMESTAMP__ + __BASE_FILE__ + __chibicc__ + __cosmopolitan__ + __GNUC__ + __GNUC_MINOR__ + __GNUC_PATCHLEVEL__ + __NO_INLINE__ + __GNUC_STDC_INLINE__ + __BIGGEST_ALIGNMENT__ + __C99_MACRO_WITH_VA_ARGS + __GCC_ASM_FLAG_OUTPUTS__ + __ELF__ + __LP64__ + _LP64 + __STDC__ + __STDC_HOSTED__ + __STDC_NO_COMPLEX__ + __STDC_UTF_16__ + __STDC_UTF_32__ + __STDC_VERSION__ + __USER_LABEL_PREFIX__ + __alignof__ + __const__ + __inline__ + __signed__ + __typeof__ + __volatile__ + __unix + __unix__ + __linux + __linux__ + __gnu_linux__ + __BYTE_ORDER__ + __FLOAT_WORD_ORDER__ + __ORDER_BIG_ENDIAN__ + __ORDER_LITTLE_ENDIAN__ + __INT8_MAX__ + __UINT8_MAX__ + __INT16_MAX__ + __UINT16_MAX__ + __SHRT_MAX__ + __INT_MAX__ + __INT32_MAX__ + __UINT32_MAX__ + __INT64_MAX__ + __LONG_MAX__ + __LONG_LONG_MAX__ + __UINT64_MAX__ + __SIZE_MAX__ + __INTPTR_MAX__ + __UINTPTR_MAX__ + __WINT_MAX__ + __CHAR_BIT__ + __SIZEOF_SHORT__ + __SIZEOF_INT__ + __SIZEOF_LONG__ + __SIZEOF_LONG_LONG__ + __SIZEOF_POINTER__ + __SIZEOF_PTRDIFF_T__ + __SIZEOF_SIZE_T__ + __SIZEOF_WCHAR_T__ + __SIZEOF_WINT_T__ + __SIZEOF_FLOAT__ + __SIZEOF_FLOAT128__ + __SIZEOF_DOUBLE__ + __SIZEOF_FLOAT80__ + __SIZEOF_LONG_DOUBLE__ + __INT8_TYPE__ + __UINT8_TYPE__ + __INT16_TYPE__ + __UINT16_TYPE__ + __INT32_TYPE__ + __UINT32_TYPE__ + __INT64_TYPE__ + __UINT64_TYPE__ + __INTPTR_TYPE__ + __UINTPTR_TYPE__ + __PTRDIFF_TYPE__ + __SIZE_TYPE__ + __WCHAR_TYPE__ + __CHAR16_TYPE__ + __CHAR32_TYPE__ + __WINT_TYPE__ + __CHAR16_TYPE__ + __WCHAR_TYPE__ + __CHAR32_TYPE__ + __INT_LEAST8_TYPE__ + __UINT_LEAST8_TYPE__ + __INT_LEAST16_TYPE__ + __UINT_LEAST16_TYPE__ + __INT_LEAST32_TYPE__ + __UINT_LEAST32_TYPE__ + __INT_LEAST64_TYPE__ + __UINT_LEAST64_TYPE__ + __INT_FAST8_TYPE__ + __UINT_FAST8_TYPE__ + __INT_FAST16_TYPE__ + __UINT_FAST16_TYPE__ + __INT_FAST32_TYPE__ + __UINT_FAST32_TYPE__ + __INT_FAST64_TYPE__ + __UINT_FAST64_TYPE__ + __DBL_DECIMAL_DIG__ + __DBL_DENORM_MIN__ + __DBL_DIG__ + __DBL_EPSILON__ + __DBL_HAS_DENORM__ + __DBL_HAS_INFINITY__ + __DBL_HAS_QUIET_NAN__ + __DBL_MANT_DIG__ + __DBL_MAX_10_EXP__ + __DBL_MAX_EXP__ + __DBL_MAX__ + __DBL_MIN_10_EXP__ + __DBL_MIN_EXP__ + __DBL_MIN__ + __FLT_DECIMAL_DIG__ + __FLT_DENORM_MIN__ + __FLT_DIG__ + __FLT_EPSILON__ + __FLT_EVAL_METHOD_TS_18661_3__ + __FLT_EVAL_METHOD__ + __FLT_HAS_DENORM__ + __FLT_HAS_INFINITY__ + __FLT_HAS_QUIET_NAN__ + __FLT_MANT_DIG__ + __FLT_MAX_10_EXP__ + __FLT_MAX_EXP__ + __FLT_MAX__ + __FLT_MIN_10_EXP__ + __FLT_MIN_EXP__ + __FLT_MIN__ + __FLT_RADIX__ + __LDBL_DECIMAL_DIG__ + __LDBL_DENORM_MIN__ + __LDBL_DIG__ + __LDBL_EPSILON__ + __LDBL_HAS_DENORM__ + __LDBL_HAS_INFINITY__ + __LDBL_HAS_QUIET_NAN__ + __LDBL_MANT_DIG__ + __LDBL_MAX_10_EXP__ + __LDBL_MAX_EXP__ + __LDBL_MAX__ + __LDBL_MIN_10_EXP__ + __LDBL_MIN_EXP__ + __LDBL_MIN__ + __x86_64 + __x86_64__ + __amd64 + __amd64__ + __MMX__ + __SSE__ + __SSE_MATH__ + __SSE2__ + __SSE2_MATH__ + __SSE3__ [conditional] + __SSE4__ [conditional] + __POPCNT__ [conditional] + __PG__ [conditional] + __PIC__ [conditional] + __MFENTRY__ [conditional] + + +ASSEMBLER + + That process is normally an implementation detail of your compiler, + which can embed this program or launch it as a subprocess. Much GNU + style syntax is supported. Your code that gets embedded in an asm() + statement will ultimately end up here. This implementation, has the + advantage of behaving the same across platforms, in a simple single + file implementation that compiles down to a 100kilo ape executable. + + Your assembler supports the following flags: + + -o FILE output path [default: a.out] + -I DIR append include path [default: .] + -W inhibit .warning + -Z inhibit .error and .err + + Your assembler supports the following directives: + + .zero N emits 0's + .byte INT... emits int8 + .word INT... emits int16 + .long INT... emits int32 + .quad INT... emits int64 + .octa INT... emits int128 + .ascii STR... emits string + .asciz STR... emits string and 0 byte + .ident STR emits string to .comment section + .float NUMBER... emits binary32 + .double NUMBER... emits binary64 + .float80 NUMBER... emits x86 float (10 bytes) + .ldbl NUMBER... emits x86 float (16 bytes) + .zleb128 NUMBER... emits LEB-128 zigzag varint + .sleb128 NUMBER... emits LEB-128 signed varint + .uleb128 NUMBER... emits LEB-128 unsigned varint + .align BYTES [FILL [MAXSKIP]] emits fill bytes to boundary + .end halts tokenization + .abort crashes assembler + .err aborts (ignorable w/ -Z) + .error STR aborts (ignorable w/ -Z) + .warning STR whines (ignorable w/ -W) + .text enters text section (default) + .data enters data section + .bss enters bss section + .section NAME [SFLG SHT] enters section + .previous enters previous section + .pushsection NAME [SFLG SHT] pushes section + .popsection pops section + .type SYM TYPE sets type of symbol + .size SYM EXPR sets size of symbol + .internal SYM... marks symbol STV_INTERNAL + .hidden SYM... marks symbol STV_HIDDEN + .protected SYM... marks symbol STV_PROTECTED + .globl SYM... marks symbol STB_GLOBAL + .local SYM... marks symbol STB_LOCAL + .weak SYM... marks symbol STB_WEAK + .include FILE assembles file source + .incbin FILE emits file content + .file FILENO PATH dwarf file define + .loc FILENO LINENO dwarf source line + + TYPE can be one of the following: + + - @notype STT_NOTYPE (default) + - @object STT_OBJECT + - @function STT_FUNC + - @common STT_COMMON + - @tls_object STT_TLS + + SHT can be one of the following: + + - @progbits SHT_PROGBITS + - @note SHT_NOTE + - @nobits SHT_NOBITS + - @preinit_array SHT_PREINIT_ARRAY + - @init_array SHT_INIT_ARRAY + - @fini_array SHT_FINI_ARRAY + + SFLG is a string which may have the following characters: + + - a SHF_ALLOC + - w SHF_WRITE + - x SHF_EXECINSTR + - g SHF_GROUP + - M SHF_MERGE + - S SHF_STRINGS + - T SHF_TLS + + PREFIXES + + addr32 cs data16 ds es fs + gs lock rep repe repne repnz + repz rex rex.b rex.r rex.rb rex.rx + rex.rxb rex.w rex.wb rex.wr rex.wrb rex.wrx + rex.wrxb rex.wx rex.wxb rex.x rex.xb ss + + REGISTERS + + 64-bit 32-bit 16-bit lo byte hi byte │ sse mmx │ fpu + ─────── ─────── ─────── ─────── ─────── │ ─────── ─────── │ ─────── + %rax %eax %ax %al %ah │ %xmm0 %mm0 │ %st(0) + %rcx %ecx %cx %cl %ch │ %xmm1 %mm1 │ %st(1) + %rdx %edx %dx %dl %dh │ %xmm2 %mm2 │ %st(2) + %rbx %ebx %bx %bl %bh │ %xmm3 %mm3 │ %st(3) + %rsp %esp %sp %spl │ %xmm4 %mm4 │ %st(4) + %rbp %ebp %bp %bpl │ %xmm5 %mm5 │ %st(5) + %rsi %esi %si %sil │ %xmm6 %mm6 │ %st(6) + %rdi %edi %di %dil │ %xmm7 %mm7 │ %st(7) + %r8 %r8d %r8w %r8b │ %xmm8 + %r9 %r9d %r9w %r9b │ %xmm9 + %r10 %r10d %r10w %r10b │ %xmm10 + %r11 %r11d %r11w %r11b │ %xmm11 + %r12 %r12d %r12w %r12b │ %xmm12 + %r13 %r13d %r13w %r13b │ %xmm13 + %r14 %r14d %r14w %r14b │ %xmm14 + %r15 %r15d %r15w %r15b │ %xmm15 + %riz %eiz + + +RICHARD STALLMAN MATH55 ASM() NOTATION + + BEHAVIOR + + =: write-only + +: read/writeable + + SELECTION + + Autonomous + + a: ax/eax/rax + b: bx/ebx/rbx + c: bx/ebx/rbx + d: dx/edx/rdx + S: si/esi/rsi + D: di/edi/rdi + + Algorithmic + + r: pick one of a,b,c,d,D,S,bp,sp,r8-15 registers, referenced as %0,etc. + l: pick one of a,b,c,d,D,S,bp,r8-15 for indexing, referenced as %0,etc. + q: pick one of a,b,c,d,r8-r15 for lo-byte access, e.g. %b0,%w0,%k0,etc. + Q: pick one of a,b,c,d for hi-byte access, e.g. %h0,etc. + U: pick one of a,c,d,D,S,r8-11 (call-clobbered) + R: pick one of a,b,c,d,D,S,bp,sp (all models) + y: pick mmx register + x: pick sse register + X: pick anything + m: memory + o: memory offsetable by an immediate, referenced as %0,2+%0,etc. + p: memory, intended for load/push address and segments (movl %@:%p1, %0) + g: probably shorthand for "rmi" combo + X: allow anything + + Combos + + rm: pick register or memory address (converting immediates) + rmi: pick register or memory address (allowing immediates) + + Immediates + + i: integer literal or compiler/assembler constexpr or linker embedding + e: i∊[-2^31,2^31) for sign-extending immediates + Z: i∊[0,2^32) for zero-extending immediates + I: i∊[0,31] (5 bits for 32-bit shifts) + J: i∊[0,63] (6 bits for 64-bit shifts) + K: i∊[-128,127] + L: permit uncasted char/short literal as zero-extended operand to andl? + M: i∊[0,3] (intended for index scaling, e.g. "mov\t(?,?,1<<%0),?") + N: i∊[0,255] (for in & out) + M: 2-bit integer constant (shifts for index scaling) + I: 5-bit integer constant (for 32-bit shifts) + J: 6-bit integer constant (for 64-bit shifts) + + Transcendentals + + f: any stack slot + t: top of stack (%st(0)) and possibly converts xmm to ymm + u: second top of stack (%st(1)) + + Specials + + %= generates number unique to each instance + %%REG explicitly-supplied register (used w/ clobbers) + + AUGMENTATION + + %pN print raw + %PN print w/ @plt + %aN print address + %zN print only opcode suffix for operand type + %lN print label without punctuation, e.g. jumps + %cN print immediate w/o punctuation, e.g. lea %c0(%1),%2 + %bN print lo-byte form, e.g. xchgb %b0,%%al (QImode 8-bit) + %hN print hi-byte form, e.g. xchgb %h0,%%ah (QImode 8-bit) + %wN print word form, e.g. xchgw %w0,%%ax (HImode 16-bit) + %kN print dword form, e.g. xchgl %k0,%%eax (SImode 32-bit) + %qN print qword form, e.g. xchgq %q0,%%rax (DImode 64-bit) + %HN access high 8 bytes of SSE register, or +8 displacement + %nN negated literal, e.g. lea %n0(%1),%2 + %VN print register name without %, e.g. call foo%V0 + + EXAMPLE + + static inline void MixAudio(short a[static 8], const short b[static 8]) { + asm("paddsw\t%1,%0" + : "+x"(a) + : "x"(b) + : "memory"); + } diff --git a/third_party/chibicc/kw.c b/third_party/chibicc/kw.c new file mode 100644 index 000000000..e2209a866 --- /dev/null +++ b/third_party/chibicc/kw.c @@ -0,0 +1,33 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "third_party/chibicc/chibicc.h" +#include "third_party/chibicc/kw.h" +#include "third_party/chibicc/kw.inc" + +/** + * Returns small number for HTTP header, or -1 if not found. + */ +unsigned char GetKw(const char *str, size_t len) { + const struct KwSlot *slot; + if ((slot = LookupKw(str, len))) { + return slot->code; + } else { + return 0; + } +} diff --git a/third_party/chibicc/kw.gperf b/third_party/chibicc/kw.gperf new file mode 100644 index 000000000..13482408c --- /dev/null +++ b/third_party/chibicc/kw.gperf @@ -0,0 +1,120 @@ +%{ +#include "libc/str/str.h" +#include "third_party/chibicc/kw.h" +%} +%compare-strncmp +%language=ANSI-C +%readonly-tables +%struct-type +%define lookup-function-name LookupKw +struct thatispacked KwSlot { char *name; unsigned char code; }; +%% +if, KW_IF +struct, KW_STRUCT +return, KW_RETURN +case, KW_CASE +static, KW_STATIC +void, KW_VOID +char, KW_CHAR +else, KW_ELSE +for, KW_FOR +do, KW_DO +sizeof, KW_SIZEOF +unsigned, KW_UNSIGNED +long, KW_LONG +while, KW_WHILE +union, KW_UNION +switch, KW_SWITCH +double, KW_DOUBLE +const, KW_CONST +float, KW_FLOAT +short, KW_SHORT +signed, KW_SIGNED +break, KW_BREAK +enum, KW_ENUM +continue, KW_CONTINUE +include, KW_INCLUDE +ifdef, KW_IFDEF +ifndef, KW_IFNDEF +define, KW_DEFINE +defined, KW_DEFINED +asm, KW_ASM +default, KW_DEFAULT +auto, KW_AUTO +register, KW_REGISTER +__attribute__, KW___ATTRIBUTE__ +_Noreturn, KW__NORETURN +elif, KW_ELIF +endif, KW_ENDIF +error, KW_ERROR +extern, KW_EXTERN +goto, KW_GOTO +include_next, KW_INCLUDE_NEXT +inline, KW_INLINE +int, KW_INT +line, KW_LINE +pragma, KW_PRAGMA +restrict, KW_RESTRICT +strchr, KW_STRCHR +strlen, KW_STRLEN +strpbrk, KW_STRPBRK +strstr, KW_STRSTR +typedef, KW_TYPEDEF +typeof, KW_TYPEOF +undef, KW_UNDEF +volatile, KW_VOLATILE +_Alignas, KW__ALIGNAS +_Alignof, KW__ALIGNOF +_Atomic, KW__ATOMIC +_Bool, KW__BOOL +_Generic, KW__GENERIC +_Static_assert, KW__STATIC_ASSERT +_Thread_local, KW__THREAD_LOCAL +__VA_OPT__, KW___VA_OPT__ +__alignof__, KW___ALIGNOF__ +__asm__, KW___ASM__ +__inline, KW_INLINE +__int128, KW___INT128 +__restrict, KW_RESTRICT +__restrict__, KW_RESTRICT +__thread, KW__THREAD_LOCAL +__typeof, KW_TYPEOF +__builtin_add_overflow, KW___BUILTIN_ADD_OVERFLOW +__builtin_assume_aligned, KW___BUILTIN_ASSUME_ALIGNED +__builtin_atomic_exchange, KW___BUILTIN_ATOMIC_EXCHANGE +__builtin_compare_and_swap, KW___BUILTIN_COMPARE_AND_SWAP +__builtin_constant_p, KW___BUILTIN_CONSTANT_P +__builtin_expect, KW___BUILTIN_EXPECT +__builtin_ffs, KW___BUILTIN_FFS +__builtin_ffsl, KW___BUILTIN_FFSL +__builtin_ffsll, KW___BUILTIN_FFSLL +__builtin_fpclassify, KW___BUILTIN_FPCLASSIFY +__builtin_mul_overflow, KW___BUILTIN_MUL_OVERFLOW +__builtin_neg_overflow, KW___BUILTIN_NEG_OVERFLOW +__builtin_offsetof, KW___BUILTIN_OFFSETOF +__builtin_popcount, KW___BUILTIN_POPCOUNT +__builtin_popcountl, KW___BUILTIN_POPCOUNTL +__builtin_popcountll, KW___BUILTIN_POPCOUNTLL +__builtin_reg_class, KW___BUILTIN_REG_CLASS +__builtin_strchr, KW___BUILTIN_STRCHR +__builtin_strlen, KW___BUILTIN_STRLEN +__builtin_strpbrk, KW___BUILTIN_STRPBRK +__builtin_strstr, KW___BUILTIN_STRSTR +__builtin_sub_overflow, KW___BUILTIN_SUB_OVERFLOW +__builtin_types_compatible_p, KW___BUILTIN_TYPES_COMPATIBLE_P +"(", KW_LP +")", KW_RP +"{", KW_LB +"}", KW_RB +"+", KW_PLUS +"-", KW_MINUS +"&", KW_AMP +"*", KW_STAR +"!", KW_EXCLAIM +"~", KW_TILDE +"++", KW_INCREMENT +"--", KW_DECREMENT +"&&", KW_LOGAND +"||", KW_LOGOR +"->", KW_ARROW +".", KW_DOT diff --git a/third_party/chibicc/kw.h b/third_party/chibicc/kw.h new file mode 100644 index 000000000..185d180e0 --- /dev/null +++ b/third_party/chibicc/kw.h @@ -0,0 +1,116 @@ +#ifndef COSMOPOLITAN_THIRD_PARTY_CHIBICC_KW_H_ +#define COSMOPOLITAN_THIRD_PARTY_CHIBICC_KW_H_ + +#define KW_STRUCT 1 /* keyword typename */ +#define KW_STATIC 2 /* keyword typename */ +#define KW_VOID 3 /* keyword typename */ +#define KW_CHAR 4 /* keyword typename */ +#define KW_UNSIGNED 5 /* keyword typename */ +#define KW_LONG 6 /* keyword typename */ +#define KW_UNION 7 /* keyword typename */ +#define KW_DOUBLE 8 /* keyword typename */ +#define KW_CONST 9 /* keyword typename */ +#define KW_FLOAT 10 /* keyword typename */ +#define KW_SHORT 11 /* keyword typename */ +#define KW_SIGNED 12 /* keyword typename */ +#define KW_ENUM 13 /* keyword typename */ +#define KW_AUTO 14 /* keyword typename */ +#define KW_REGISTER 15 /* keyword typename */ +#define KW__NORETURN 16 /* keyword typename */ +#define KW_EXTERN 17 /* keyword typename */ +#define KW_INLINE 18 /* keyword typename */ +#define KW_INT 19 /* keyword typename */ +#define KW_RESTRICT 20 /* keyword typename */ +#define KW_TYPEDEF 21 /* keyword typename */ +#define KW_TYPEOF 22 /* keyword typename */ +#define KW_VOLATILE 23 /* keyword typename */ +#define KW__ALIGNAS 24 /* keyword typename */ +#define KW__ATOMIC 25 /* keyword typename */ +#define KW__BOOL 26 /* keyword typename */ +#define KW__THREAD_LOCAL 27 /* keyword typename */ +#define KW___INT128 28 /* keyword typename */ +#define KW_IF 33 /* keyword */ +#define KW_RETURN 34 /* keyword */ +#define KW_CASE 35 /* keyword */ +#define KW_ELSE 36 /* keyword */ +#define KW_FOR 37 /* keyword */ +#define KW_DO 38 /* keyword */ +#define KW_SIZEOF 39 /* keyword */ +#define KW_WHILE 40 /* keyword */ +#define KW_SWITCH 41 /* keyword */ +#define KW_BREAK 42 /* keyword */ +#define KW_CONTINUE 43 /* keyword */ +#define KW_ASM 44 /* keyword */ +#define KW_DEFAULT 45 /* keyword */ +#define KW___ATTRIBUTE__ 46 /* keyword */ +#define KW_GOTO 47 /* keyword */ +#define KW__ALIGNOF 48 /* keyword */ +#define KW_INCLUDE 64 +#define KW_IFDEF 65 +#define KW_IFNDEF 66 +#define KW_DEFINE 67 +#define KW_DEFINED 68 +#define KW_ELIF 69 +#define KW_ENDIF 70 +#define KW_ERROR 71 +#define KW_INCLUDE_NEXT 72 +#define KW_LINE 73 +#define KW_PRAGMA 74 +#define KW_STRCHR 75 +#define KW_STRLEN 76 +#define KW_STRPBRK 77 +#define KW_STRSTR 78 +#define KW_UNDEF 79 +#define KW__GENERIC 80 +#define KW__STATIC_ASSERT 81 +#define KW___VA_OPT__ 82 +#define KW___ALIGNOF__ 83 +#define KW___ASM__ 84 +#define KW___BUILTIN_ADD_OVERFLOW 85 +#define KW___BUILTIN_ASSUME_ALIGNED 86 +#define KW___BUILTIN_ATOMIC_EXCHANGE 87 +#define KW___BUILTIN_COMPARE_AND_SWAP 88 +#define KW___BUILTIN_CONSTANT_P 89 +#define KW___BUILTIN_EXPECT 90 +#define KW___BUILTIN_FFS 91 +#define KW___BUILTIN_FFSL 92 +#define KW___BUILTIN_FFSLL 93 +#define KW___BUILTIN_FPCLASSIFY 94 +#define KW___BUILTIN_MUL_OVERFLOW 95 +#define KW___BUILTIN_NEG_OVERFLOW 96 +#define KW___BUILTIN_OFFSETOF 97 +#define KW___BUILTIN_POPCOUNT 98 +#define KW___BUILTIN_POPCOUNTL 99 +#define KW___BUILTIN_POPCOUNTLL 100 +#define KW___BUILTIN_REG_CLASS 101 +#define KW___BUILTIN_STRCHR 102 +#define KW___BUILTIN_STRLEN 103 +#define KW___BUILTIN_STRPBRK 104 +#define KW___BUILTIN_STRSTR 105 +#define KW___BUILTIN_SUB_OVERFLOW 106 +#define KW___BUILTIN_TYPES_COMPATIBLE_P 107 +#define KW_LP 108 +#define KW_RP 109 +#define KW_LB 110 +#define KW_RB 111 +#define KW_PLUS 112 +#define KW_MINUS 113 +#define KW_AMP 114 +#define KW_STAR 115 +#define KW_EXCLAIM 116 +#define KW_TILDE 117 +#define KW_INCREMENT 118 +#define KW_DECREMENT 119 +#define KW_LOGAND 120 +#define KW_LOGOR 121 +#define KW_ARROW 122 +#define KW_DOT 123 + +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +unsigned char GetKw(const char *, size_t); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_THIRD_PARTY_CHIBICC_KW_H_ */ diff --git a/third_party/chibicc/kw.inc b/third_party/chibicc/kw.inc new file mode 100644 index 000000000..eeb8bc5c8 --- /dev/null +++ b/third_party/chibicc/kw.inc @@ -0,0 +1,395 @@ +/* ANSI-C code produced by gperf version 3.1 */ +/* Command-line: gperf kw.gperf */ +/* Computed positions: -k'1,4,11,14,$' */ +/* clang-format off */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#endif + +#line 1 "kw.gperf" + +#include "libc/str/str.h" +#include "third_party/chibicc/kw.h" +#line 10 "kw.gperf" +struct thatispacked KwSlot { char *name; unsigned char code; }; + +#define TOTAL_KEYWORDS 109 +#define MIN_WORD_LENGTH 1 +#define MAX_WORD_LENGTH 28 +#define MIN_HASH_VALUE 1 +#define MAX_HASH_VALUE 211 +/* maximum key range = 211, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register size_t len) +{ + static const unsigned char asso_values[] = + { + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 105, 212, 212, 212, 212, 65, 212, + 100, 95, 90, 15, 212, 0, 80, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 5, 212, 212, 212, + 212, 212, 65, 212, 212, 212, 0, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 5, 212, 20, 50, 0, + 5, 15, 0, 25, 40, 90, 60, 0, 20, 15, + 85, 105, 0, 25, 55, 10, 0, 65, 5, 0, + 0, 10, 0, 30, 10, 25, 5, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212, 212, 212, 212, + 212, 212, 212, 212, 212, 212, 212 + }; + register unsigned int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[13]]; + /*FALLTHROUGH*/ + case 13: + case 12: + case 11: + hval += asso_values[(unsigned char)str[10]]; + /*FALLTHROUGH*/ + case 10: + case 9: + case 8: + case 7: + case 6: + case 5: + case 4: + hval += asso_values[(unsigned char)str[3]+1]; + /*FALLTHROUGH*/ + case 3: + case 2: + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +static inline const struct thatispacked KwSlot * +LookupKw (register const char *str, register size_t len) +{ + static const struct thatispacked KwSlot wordlist[] = + { + {""}, +#line 110 "kw.gperf" + {"-", KW_MINUS}, +#line 116 "kw.gperf" + {"--", KW_DECREMENT}, + {""}, {""}, +#line 29 "kw.gperf" + {"const", KW_CONST}, +#line 63 "kw.gperf" + {"typeof", KW_TYPEOF}, +#line 62 "kw.gperf" + {"typedef", KW_TYPEDEF}, + {""}, {""}, {""}, +#line 114 "kw.gperf" + {"~", KW_TILDE}, +#line 68 "kw.gperf" + {"_Atomic", KW__ATOMIC}, +#line 81 "kw.gperf" + {"__typeof", KW_TYPEOF}, + {""}, +#line 78 "kw.gperf" + {"__restrict", KW_RESTRICT}, +#line 22 "kw.gperf" + {"sizeof", KW_SIZEOF}, +#line 75 "kw.gperf" + {"__asm__", KW___ASM__}, + {""}, +#line 15 "kw.gperf" + {"case", KW_CASE}, +#line 73 "kw.gperf" + {"__VA_OPT__", KW___VA_OPT__}, +#line 13 "kw.gperf" + {"struct", KW_STRUCT}, +#line 118 "kw.gperf" + {"||", KW_LOGOR}, + {""}, {""}, +#line 31 "kw.gperf" + {"short", KW_SHORT}, +#line 28 "kw.gperf" + {"double", KW_DOUBLE}, +#line 79 "kw.gperf" + {"__restrict__", KW_RESTRICT}, +#line 95 "kw.gperf" + {"__builtin_popcount", KW___BUILTIN_POPCOUNT}, +#line 17 "kw.gperf" + {"void", KW_VOID}, +#line 69 "kw.gperf" + {"_Bool", KW__BOOL}, +#line 109 "kw.gperf" + {"+", KW_PLUS}, +#line 115 "kw.gperf" + {"++", KW_INCREMENT}, +#line 88 "kw.gperf" + {"__builtin_ffs", KW___BUILTIN_FFS}, +#line 19 "kw.gperf" + {"else", KW_ELSE}, +#line 25 "kw.gperf" + {"while", KW_WHILE}, +#line 85 "kw.gperf" + {"__builtin_compare_and_swap", KW___BUILTIN_COMPARE_AND_SWAP}, +#line 101 "kw.gperf" + {"__builtin_strpbrk", KW___BUILTIN_STRPBRK}, +#line 41 "kw.gperf" + {"asm", KW_ASM}, +#line 55 "kw.gperf" + {"line", KW_LINE}, +#line 86 "kw.gperf" + {"__builtin_constant_p", KW___BUILTIN_CONSTANT_P}, +#line 74 "kw.gperf" + {"__alignof__", KW___ALIGNOF__}, +#line 60 "kw.gperf" + {"strpbrk", KW_STRPBRK}, + {""}, +#line 47 "kw.gperf" + {"elif", KW_ELIF}, + {""}, {""}, +#line 103 "kw.gperf" + {"__builtin_sub_overflow", KW___BUILTIN_SUB_OVERFLOW}, +#line 72 "kw.gperf" + {"_Thread_local", KW__THREAD_LOCAL}, +#line 96 "kw.gperf" + {"__builtin_popcountl", KW___BUILTIN_POPCOUNTL}, +#line 97 "kw.gperf" + {"__builtin_popcountll", KW___BUILTIN_POPCOUNTLL}, +#line 108 "kw.gperf" + {"}", KW_RB}, +#line 92 "kw.gperf" + {"__builtin_mul_overflow", KW___BUILTIN_MUL_OVERFLOW}, +#line 104 "kw.gperf" + {"__builtin_types_compatible_p", KW___BUILTIN_TYPES_COMPATIBLE_P}, + {""}, +#line 30 "kw.gperf" + {"float", KW_FLOAT}, +#line 87 "kw.gperf" + {"__builtin_expect", KW___BUILTIN_EXPECT}, +#line 82 "kw.gperf" + {"__builtin_add_overflow", KW___BUILTIN_ADD_OVERFLOW}, +#line 20 "kw.gperf" + {"for", KW_FOR}, + {""}, +#line 91 "kw.gperf" + {"__builtin_fpclassify", KW___BUILTIN_FPCLASSIFY}, +#line 107 "kw.gperf" + {"{", KW_LB}, +#line 42 "kw.gperf" + {"default", KW_DEFAULT}, + {""}, +#line 89 "kw.gperf" + {"__builtin_ffsl", KW___BUILTIN_FFSL}, +#line 90 "kw.gperf" + {"__builtin_ffsll", KW___BUILTIN_FFSLL}, +#line 56 "kw.gperf" + {"pragma", KW_PRAGMA}, +#line 119 "kw.gperf" + {"->", KW_ARROW}, + {""}, +#line 18 "kw.gperf" + {"char", KW_CHAR}, +#line 64 "kw.gperf" + {"undef", KW_UNDEF}, +#line 61 "kw.gperf" + {"strstr", KW_STRSTR}, + {""}, +#line 67 "kw.gperf" + {"_Alignof", KW__ALIGNOF}, + {""}, +#line 49 "kw.gperf" + {"error", KW_ERROR}, +#line 58 "kw.gperf" + {"strchr", KW_STRCHR}, +#line 40 "kw.gperf" + {"defined", KW_DEFINED}, +#line 65 "kw.gperf" + {"volatile", KW_VOLATILE}, +#line 71 "kw.gperf" + {"_Static_assert", KW__STATIC_ASSERT}, +#line 48 "kw.gperf" + {"endif", KW_ENDIF}, +#line 16 "kw.gperf" + {"static", KW_STATIC}, + {""}, +#line 66 "kw.gperf" + {"_Alignas", KW__ALIGNAS}, + {""}, +#line 84 "kw.gperf" + {"__builtin_atomic_exchange", KW___BUILTIN_ATOMIC_EXCHANGE}, +#line 39 "kw.gperf" + {"define", KW_DEFINE}, + {""}, +#line 35 "kw.gperf" + {"continue", KW_CONTINUE}, +#line 24 "kw.gperf" + {"long", KW_LONG}, + {""}, +#line 99 "kw.gperf" + {"__builtin_strchr", KW___BUILTIN_STRCHR}, +#line 12 "kw.gperf" + {"if", KW_IF}, +#line 54 "kw.gperf" + {"int", KW_INT}, + {""}, +#line 37 "kw.gperf" + {"ifdef", KW_IFDEF}, + {""}, {""}, {""}, +#line 98 "kw.gperf" + {"__builtin_reg_class", KW___BUILTIN_REG_CLASS}, + {""}, +#line 102 "kw.gperf" + {"__builtin_strstr", KW___BUILTIN_STRSTR}, + {""}, +#line 45 "kw.gperf" + {"__attribute__", KW___ATTRIBUTE__}, + {""}, +#line 33 "kw.gperf" + {"break", KW_BREAK}, +#line 50 "kw.gperf" + {"extern", KW_EXTERN}, + {""}, +#line 80 "kw.gperf" + {"__thread", KW__THREAD_LOCAL}, +#line 46 "kw.gperf" + {"_Noreturn", KW__NORETURN}, + {""}, +#line 38 "kw.gperf" + {"ifndef", KW_IFNDEF}, +#line 21 "kw.gperf" + {"do", KW_DO}, + {""}, {""}, {""}, +#line 59 "kw.gperf" + {"strlen", KW_STRLEN}, +#line 52 "kw.gperf" + {"include_next", KW_INCLUDE_NEXT}, +#line 70 "kw.gperf" + {"_Generic", KW__GENERIC}, +#line 34 "kw.gperf" + {"enum", KW_ENUM}, + {""}, +#line 27 "kw.gperf" + {"switch", KW_SWITCH}, +#line 93 "kw.gperf" + {"__builtin_neg_overflow", KW___BUILTIN_NEG_OVERFLOW}, +#line 77 "kw.gperf" + {"__int128", KW___INT128}, +#line 83 "kw.gperf" + {"__builtin_assume_aligned", KW___BUILTIN_ASSUME_ALIGNED}, + {""}, +#line 32 "kw.gperf" + {"signed", KW_SIGNED}, +#line 36 "kw.gperf" + {"include", KW_INCLUDE}, +#line 57 "kw.gperf" + {"restrict", KW_RESTRICT}, +#line 43 "kw.gperf" + {"auto", KW_AUTO}, + {""}, +#line 111 "kw.gperf" + {"&", KW_AMP}, +#line 117 "kw.gperf" + {"&&", KW_LOGAND}, +#line 76 "kw.gperf" + {"__inline", KW_INLINE}, +#line 51 "kw.gperf" + {"goto", KW_GOTO}, + {""}, {""}, {""}, +#line 23 "kw.gperf" + {"unsigned", KW_UNSIGNED}, + {""}, {""}, +#line 100 "kw.gperf" + {"__builtin_strlen", KW___BUILTIN_STRLEN}, + {""}, +#line 94 "kw.gperf" + {"__builtin_offsetof", KW___BUILTIN_OFFSETOF}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 14 "kw.gperf" + {"return", KW_RETURN}, + {""}, {""}, {""}, +#line 26 "kw.gperf" + {"union", KW_UNION}, + {""}, {""}, {""}, {""}, {""}, +#line 120 "kw.gperf" + {".", KW_DOT}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 53 "kw.gperf" + {"inline", KW_INLINE}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 44 "kw.gperf" + {"register", KW_REGISTER}, + {""}, {""}, +#line 112 "kw.gperf" + {"*", KW_STAR}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 106 "kw.gperf" + {")", KW_RP}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 105 "kw.gperf" + {"(", KW_LP}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 113 "kw.gperf" + {"!", KW_EXCLAIM} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register unsigned int key = hash (str, len); + + if (key <= MAX_HASH_VALUE) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/third_party/chibicc/parse.c b/third_party/chibicc/parse.c index 6609ce533..fb6407b4c 100644 --- a/third_party/chibicc/parse.c +++ b/third_party/chibicc/parse.c @@ -16,9 +16,14 @@ // So it is very easy to lookahead arbitrary number of tokens in this // parser. +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/log/libfatal.internal.h" +#include "libc/mem/mem.h" #include "libc/nexgen32e/ffs.h" #include "libc/testlib/testlib.h" #include "third_party/chibicc/chibicc.h" +#include "third_party/chibicc/kw.h" typedef struct InitDesg InitDesg; typedef struct Initializer Initializer; @@ -217,22 +222,31 @@ static Node *new_num(int64_t val, Token *tok) { return node; } +static Node *new_int(int64_t val, Token *tok) { + Node *node = new_num(val, tok); + node->ty = ty_int; + return node; +} + +static Node *new_bool(int64_t val, Token *tok) { + Node *node = new_num(val, tok); + node->ty = ty_bool; + return node; +} + static Node *new_long(int64_t val, Token *tok) { - Node *node = new_node(ND_NUM, tok); - node->val = val; + Node *node = new_num(val, tok); node->ty = ty_long; return node; } -static Node *new_ulong(long val, Token *tok) { - Node *node = new_node(ND_NUM, tok); - node->val = val; +static Node *new_ulong(int64_t val, Token *tok) { + Node *node = new_num(val, tok); node->ty = ty_ulong; return node; } static Node *new_var_node(Obj *var, Token *tok) { - if (!var) DebugBreak(); CHECK_NOTNULL(var); Node *node = new_node(ND_VAR, tok); node->var = var; @@ -605,8 +619,6 @@ static Token *thing_attributes(Token *tok, void *arg) { error_tok(tok, "unknown function attribute"); } -Token *to; - // declspec = ("void" | "_Bool" | "char" | "short" | "int" | "long" // | "typedef" | "static" | "extern" | "inline" // | "_Thread_local" | "__thread" @@ -646,90 +658,92 @@ static Type *declspec(Token **rest, Token *tok, VarAttr *attr) { UNSIGNED = 1 << 18, INT128 = 1 << 19, }; + unsigned char kw; Type *ty = copy_type(ty_int); int counter = 0; bool is_const = false; bool is_atomic = false; while (is_typename(tok)) { - // Handle storage class specifiers. - if (EQUAL(tok, "typedef") || EQUAL(tok, "static") || EQUAL(tok, "extern") || - EQUAL(tok, "inline") || EQUAL(tok, "__inline") || - EQUAL(tok, "_Thread_local") || EQUAL(tok, "__thread")) { - if (!attr) - error_tok(tok, - "storage class specifier is not allowed in this context"); - if (EQUAL(tok, "typedef")) { - attr->is_typedef = true; - } else if (EQUAL(tok, "static")) { - attr->is_static = true; - } else if (EQUAL(tok, "extern")) { - attr->is_extern = true; - } else if (EQUAL(tok, "inline") || EQUAL(tok, "__inline")) { - attr->is_inline = true; - } else { - attr->is_tls = true; - } - if (attr->is_typedef && - attr->is_static + attr->is_extern + attr->is_inline + attr->is_tls > - 1) { - to = tok; - error_tok(tok, "typedef may not be used together with static," - " extern, inline, __thread or _Thread_local"); - } - tok = tok->next; - goto Continue; - } - if (CONSUME(&tok, tok, "_Noreturn")) { - if (attr) attr->is_noreturn = true; - goto Continue; - } - if (CONSUME(&tok, tok, "const")) { - is_const = true; - goto Continue; - } - // These keywords are recognized but ignored. - if (CONSUME(&tok, tok, "volatile") || CONSUME(&tok, tok, "auto") || - CONSUME(&tok, tok, "register") || CONSUME(&tok, tok, "restrict") || - CONSUME(&tok, tok, "__restrict") || - CONSUME(&tok, tok, "__restrict__")) { - goto Continue; - } - if (EQUAL(tok, "_Atomic")) { - tok = tok->next; - if (EQUAL(tok, "(")) { - ty = typename(&tok, tok->next); - tok = skip(tok, ')'); - } - is_atomic = true; - goto Continue; - } - if (EQUAL(tok, "_Alignas")) { - if (!attr) error_tok(tok, "_Alignas is not allowed in this context"); - tok = skip(tok->next, '('); - if (is_typename(tok)) { - attr->align = typename(&tok, tok)->align; - } else { - Token *altok = tok; - attr->align = const_expr(&tok, tok); - if (popcnt(ty->align) != 1) { - error_tok(altok, "_Alignas needs two power"); + if ((kw = GetKw(tok->loc, tok->len))) { + // Handle storage class specifiers. + if (kw == KW_TYPEDEF || kw == KW_STATIC || kw == KW_EXTERN || + kw == KW_INLINE || kw == KW__THREAD_LOCAL) { + if (!attr) + error_tok(tok, + "storage class specifier is not allowed in this context"); + if (kw == KW_TYPEDEF) { + attr->is_typedef = true; + } else if (kw == KW_STATIC) { + attr->is_static = true; + } else if (kw == KW_EXTERN) { + attr->is_extern = true; + } else if (kw == KW_INLINE) { + attr->is_inline = true; + } else { + attr->is_tls = true; } + if (attr->is_typedef && + attr->is_static + attr->is_extern + attr->is_inline + attr->is_tls > + 1) { + error_tok(tok, "typedef may not be used together with static," + " extern, inline, __thread or _Thread_local"); + } + tok = tok->next; + goto Continue; + } + if (kw == KW__NORETURN) { + if (attr) attr->is_noreturn = true; + tok = tok->next; + goto Continue; + } + if (kw == KW_CONST) { + is_const = true; + tok = tok->next; + goto Continue; + } + // These keywords are recognized but ignored. + if (kw == KW_VOLATILE || kw == KW_AUTO || kw == KW_REGISTER || + kw == KW_RESTRICT) { + tok = tok->next; + goto Continue; + } + if (kw == KW__ATOMIC) { + tok = tok->next; + if (EQUAL(tok, "(")) { + ty = typename(&tok, tok->next); + tok = skip(tok, ')'); + } + is_atomic = true; + goto Continue; + } + if (kw == KW__ALIGNAS) { + if (!attr) error_tok(tok, "_Alignas is not allowed in this context"); + tok = skip(tok->next, '('); + if (is_typename(tok)) { + attr->align = typename(&tok, tok)->align; + } else { + Token *altok = tok; + attr->align = const_expr(&tok, tok); + if (popcnt(ty->align) != 1) { + error_tok(altok, "_Alignas needs two power"); + } + } + tok = skip(tok, ')'); + goto Continue; } - tok = skip(tok, ')'); - goto Continue; } // Handle user-defined types. Type *ty2 = find_typedef(tok); - if (EQUAL(tok, "struct") || EQUAL(tok, "union") || EQUAL(tok, "enum") || - EQUAL(tok, "typeof") || EQUAL(tok, "__typeof") || ty2) { + if (ty2 || kw == KW_STRUCT || kw == KW_UNION || kw == KW_ENUM || + kw == KW_TYPEOF) { if (counter) break; - if (EQUAL(tok, "struct")) { + if (kw == KW_STRUCT) { ty = struct_decl(&tok, tok->next); - } else if (EQUAL(tok, "union")) { + } else if (kw == KW_UNION) { ty = union_decl(&tok, tok->next); - } else if (EQUAL(tok, "enum")) { + } else if (kw == KW_ENUM) { ty = enum_specifier(&tok, tok->next); - } else if (EQUAL(tok, "typeof") || EQUAL(tok, "__typeof")) { + } else if (kw == KW_TYPEOF) { ty = typeof_specifier(&tok, tok->next); } else { ty = ty2; @@ -739,30 +753,31 @@ static Type *declspec(Token **rest, Token *tok, VarAttr *attr) { goto Continue; } // Handle built-in types. - if (EQUAL(tok, "void")) + if (kw == KW_VOID) { counter += VOID; - else if (EQUAL(tok, "_Bool")) + } else if (kw == KW__BOOL) { counter += BOOL; - else if (EQUAL(tok, "char")) + } else if (kw == KW_CHAR) { counter += CHAR; - else if (EQUAL(tok, "short")) + } else if (kw == KW_SHORT) { counter += SHORT; - else if (EQUAL(tok, "int")) + } else if (kw == KW_INT) { counter += INT; - else if (EQUAL(tok, "long")) + } else if (kw == KW_LONG) { counter += LONG; - else if (EQUAL(tok, "__int128")) + } else if (kw == KW___INT128) { counter += INT128; - else if (EQUAL(tok, "float")) + } else if (kw == KW_FLOAT) { counter += FLOAT; - else if (EQUAL(tok, "double")) + } else if (kw == KW_DOUBLE) { counter += DOUBLE; - else if (EQUAL(tok, "signed")) + } else if (kw == KW_SIGNED) { counter |= SIGNED; - else if (EQUAL(tok, "unsigned")) + } else if (kw == KW_UNSIGNED) { counter |= UNSIGNED; - else + } else { UNREACHABLE(); + } switch (counter) { case VOID: ty = copy_type(ty_void); @@ -843,10 +858,10 @@ static Type *declspec(Token **rest, Token *tok, VarAttr *attr) { ty = copy_type(ty); ty->is_atomic = true; } - /* if (attr && is_const) { */ - /* ty = copy_type(ty); */ - /* ty->is_const = true; */ - /* } */ + if (is_const) { + ty = copy_type(ty); + ty->is_const = true; + } *rest = tok; return ty; } @@ -895,10 +910,15 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) { new_lvar(strndup(name->loc, name->len), ty2); } if (ty2->kind == TY_ARRAY) { - // "array of T" is converted to "pointer to T" only in the parameter - // context. For example, *argv[] is converted to **argv by this. - ty2 = pointer_to(ty2->base); - ty2->name = name; + // "array of T" decays to "pointer to T" only in the parameter + // context. For example, *argv[] is converted to **argv here. + Type *ty3 = ty2; + ty3 = pointer_to(ty2->base); + ty3->name = name; + ty3->array_len = ty2->array_len; + ty3->is_static = ty2->is_static; + ty3->is_restrict = ty2->is_restrict; + ty2 = ty3; } else if (ty2->kind == TY_FUNC) { // Likewise, a function is converted to a pointer to a function // only in the parameter context. @@ -918,16 +938,32 @@ static Type *func_params(Token **rest, Token *tok, Type *ty) { // array-dimensions = ("static" | "restrict")* const-expr? "]" type-suffix static Type *array_dimensions(Token **rest, Token *tok, Type *ty) { - while (EQUAL(tok, "static") || EQUAL(tok, "restrict")) tok = tok->next; + Node *expr; + bool is_static, is_restrict; + is_static = false; + is_restrict = false; + for (;; tok = tok->next) { + if (EQUAL(tok, "static")) { + is_static = true; + } else if (EQUAL(tok, "restrict")) { + is_restrict = true; + } else { + break; + } + } if (EQUAL(tok, "]")) { ty = type_suffix(rest, tok->next, ty); - return array_of(ty, -1); + ty = array_of(ty, -1); + } else { + expr = conditional(&tok, tok); + tok = skip(tok, ']'); + ty = type_suffix(rest, tok, ty); + if (ty->kind == TY_VLA || !is_const_expr(expr)) return vla_of(ty, expr); + ty = array_of(ty, eval(expr)); } - Node *expr = conditional(&tok, tok); - tok = skip(tok, ']'); - ty = type_suffix(rest, tok, ty); - if (ty->kind == TY_VLA || !is_const_expr(expr)) return vla_of(ty, expr); - return array_of(ty, eval(expr)); + ty->is_static = is_static; + ty->is_restrict = is_restrict; + return ty; } // type-suffix = "(" func-params @@ -944,10 +980,21 @@ static Type *type_suffix(Token **rest, Token *tok, Type *ty) { static Type *pointers(Token **rest, Token *tok, Type *ty) { while (CONSUME(&tok, tok, "*")) { ty = pointer_to(ty); - while (EQUAL(tok, "const") || EQUAL(tok, "volatile") || - EQUAL(tok, "restrict") || EQUAL(tok, "__restrict") || - EQUAL(tok, "__restrict__")) - tok = tok->next; + for (;;) { + if (EQUAL(tok, "const")) { + ty->is_const = true; + tok = tok->next; + } else if (EQUAL(tok, "volatile")) { + ty->is_volatile = true; + tok = tok->next; + } else if (EQUAL(tok, "restrict") || EQUAL(tok, "__restrict") || + EQUAL(tok, "__restrict__")) { + ty->is_restrict = true; + tok = tok->next; + } else { + break; + } + } } *rest = tok; return ty; @@ -1668,22 +1715,9 @@ static void gvar_initializer(Token **rest, Token *tok, Obj *var) { // Returns true if a given token represents a type. static bool is_typename(Token *tok) { - static HashMap map; - if (map.capacity == 0) { - static char *kw[] = { - "void", "_Bool", "char", "short", "int", - "long", "struct", "union", "typedef", "enum", - "static", "extern", "_Alignas", "signed", "unsigned", - "const", "volatile", "auto", "register", "restrict", - "__restrict", "__restrict__", "_Noreturn", "float", "double", - "typeof", "__typeof", "inline", "__inline", "_Thread_local", - "__thread", "_Atomic", "__int128", - }; - for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) { - hashmap_put(&map, kw[i], (void *)1); - } - } - return hashmap_get2(&map, tok->loc, tok->len) || find_typedef(tok); + unsigned char kw; + kw = GetKw(tok->loc, tok->len); + return (kw && !(kw & -32)) || find_typedef(tok); } static bool is_const_expr_true(Node *node) { @@ -2286,28 +2320,46 @@ static Node *to_assign(Node *binary) { // | "<<=" | ">>=" static Node *assign(Token **rest, Token *tok) { Node *node = conditional(&tok, tok); - if (EQUAL(tok, "=")) - return new_binary(ND_ASSIGN, node, assign(rest, tok->next), tok); - if (EQUAL(tok, "+=")) - return to_assign(new_add(node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "-=")) - return to_assign(new_sub(node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "*=")) - return to_assign(new_binary(ND_MUL, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "/=")) - return to_assign(new_binary(ND_DIV, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "%=")) - return to_assign(new_binary(ND_REM, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "&=")) - return to_assign(new_binary(ND_BINAND, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "|=")) - return to_assign(new_binary(ND_BINOR, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "^=")) - return to_assign(new_binary(ND_BINXOR, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, "<<=")) - return to_assign(new_binary(ND_SHL, node, assign(rest, tok->next), tok)); - if (EQUAL(tok, ">>=")) - return to_assign(new_binary(ND_SHR, node, assign(rest, tok->next), tok)); + if (tok->len == 1) { + if (tok->loc[0] == '=') { + return new_binary(ND_ASSIGN, node, assign(rest, tok->next), tok); + } + } else if (tok->len == 2) { + if (tok->loc[0] == '+' && tok->loc[1] == '=') { + return to_assign(new_add(node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '-' && tok->loc[1] == '=') { + return to_assign(new_sub(node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '*' && tok->loc[1] == '=') { + return to_assign(new_binary(ND_MUL, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '/' && tok->loc[1] == '=') { + return to_assign(new_binary(ND_DIV, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '%' && tok->loc[1] == '=') { + return to_assign(new_binary(ND_REM, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '&' && tok->loc[1] == '=') { + return to_assign( + new_binary(ND_BINAND, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '|' && tok->loc[1] == '=') { + return to_assign( + new_binary(ND_BINOR, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '^' && tok->loc[1] == '=') { + return to_assign( + new_binary(ND_BINXOR, node, assign(rest, tok->next), tok)); + } + } else if (tok->len == 3) { + if (tok->loc[0] == '<' && tok->loc[1] == '<' && tok->loc[2] == '=') { + return to_assign(new_binary(ND_SHL, node, assign(rest, tok->next), tok)); + } + if (tok->loc[0] == '>' && tok->loc[1] == '>' && tok->loc[2] == '=') { + return to_assign(new_binary(ND_SHR, node, assign(rest, tok->next), tok)); + } + } *rest = tok; return node; } @@ -2447,17 +2499,20 @@ static Node *relational(Token **rest, Token *tok) { node = new_binary(ND_LT, node, shift(&tok, tok->next), start); continue; } - if (EQUAL(tok, "<=")) { - node = new_binary(ND_LE, node, shift(&tok, tok->next), start); - continue; - } - if (EQUAL(tok, ">")) { - node = new_binary(ND_LT, shift(&tok, tok->next), node, start); - continue; - } - if (EQUAL(tok, ">=")) { - node = new_binary(ND_LE, shift(&tok, tok->next), node, start); - continue; + if (tok->len == 2) { + if (tok->loc[0] == '<' && tok->loc[1] == '=') { + node = new_binary(ND_LE, node, shift(&tok, tok->next), start); + continue; + } + if (tok->loc[0] == '>' && tok->loc[1] == '=') { + node = new_binary(ND_LE, shift(&tok, tok->next), node, start); + continue; + } + } else if (tok->len == 1) { + if (tok->loc[0] == '>') { + node = new_binary(ND_LT, shift(&tok, tok->next), node, start); + continue; + } } *rest = tok; return node; @@ -2469,13 +2524,15 @@ static Node *shift(Token **rest, Token *tok) { Node *node = add(&tok, tok); for (;;) { Token *start = tok; - if (EQUAL(tok, "<<")) { - node = new_binary(ND_SHL, node, add(&tok, tok->next), start); - continue; - } - if (EQUAL(tok, ">>")) { - node = new_binary(ND_SHR, node, add(&tok, tok->next), start); - continue; + if (tok->len == 2) { + if (tok->loc[0] == '<' && tok->loc[1] == '<') { + node = new_binary(ND_SHL, node, add(&tok, tok->next), start); + continue; + } + if (tok->loc[0] == '>' && tok->loc[1] == '>') { + node = new_binary(ND_SHR, node, add(&tok, tok->next), start); + continue; + } } *rest = tok; return node; @@ -2582,13 +2639,15 @@ static Node *add(Token **rest, Token *tok) { Node *node = mul(&tok, tok); for (;;) { Token *start = tok; - if (EQUAL(tok, "+")) { - node = new_add(node, mul(&tok, tok->next), start); - continue; - } - if (EQUAL(tok, "-")) { - node = new_sub(node, mul(&tok, tok->next), start); - continue; + if (tok->len == 1) { + if (tok->loc[0] == '+') { + node = new_add(node, mul(&tok, tok->next), start); + continue; + } + if (tok->loc[0] == '-') { + node = new_sub(node, mul(&tok, tok->next), start); + continue; + } } *rest = tok; return node; @@ -2600,17 +2659,19 @@ static Node *mul(Token **rest, Token *tok) { Node *node = cast(&tok, tok); for (;;) { Token *start = tok; - if (EQUAL(tok, "*")) { - node = new_mul(node, cast(&tok, tok->next), start); - continue; - } - if (EQUAL(tok, "/")) { - node = new_binary(ND_DIV, node, cast(&tok, tok->next), start); - continue; - } - if (EQUAL(tok, "%")) { - node = new_binary(ND_REM, node, cast(&tok, tok->next), start); - continue; + if (tok->len == 1) { + if (tok->loc[0] == '*') { + node = new_mul(node, cast(&tok, tok->next), start); + continue; + } + if (tok->loc[0] == '/') { + node = new_binary(ND_DIV, node, cast(&tok, tok->next), start); + continue; + } + if (tok->loc[0] == '%') { + node = new_binary(ND_REM, node, cast(&tok, tok->next), start); + continue; + } } *rest = tok; return node; @@ -2638,47 +2699,60 @@ static Node *cast(Token **rest, Token *tok) { // | "&&" ident // | postfix static Node *unary(Token **rest, Token *tok) { - if (EQUAL(tok, "+")) return cast(rest, tok->next); - if (EQUAL(tok, "-")) return new_unary(ND_NEG, cast(rest, tok->next), tok); - if (EQUAL(tok, "&")) { - Node *lhs = cast(rest, tok->next); - add_type(lhs); - if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield) { - error_tok(tok, "cannot take address of bitfield"); + if (tok->len == 1) { + if (tok->loc[0] == '+') { + return cast(rest, tok->next); + } + if (tok->loc[0] == '-') { + return new_unary(ND_NEG, cast(rest, tok->next), tok); + } + if (tok->loc[0] == '&') { + Node *lhs = cast(rest, tok->next); + add_type(lhs); + if (lhs->kind == ND_MEMBER && lhs->member->is_bitfield) { + error_tok(tok, "cannot take address of bitfield"); + } + return new_unary(ND_ADDR, lhs, tok); + } + if (tok->loc[0] == '*') { + // [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity + // in the C spec, but dereferencing a function shouldn't do + // anything. If foo is a function, `*foo`, `**foo` or `*****foo` + // are all equivalent to just `foo`. + Node *node = cast(rest, tok->next); + add_type(node); + if (node->ty->kind == TY_FUNC) return node; + return new_unary(ND_DEREF, node, tok); + } + if (tok->loc[0] == '!') { + return new_unary(ND_NOT, cast(rest, tok->next), tok); + } + if (tok->loc[0] == '~') { + return new_unary(ND_BITNOT, cast(rest, tok->next), tok); + } + } else if (tok->len == 2) { + // Read ++i as i+=1 + if (tok->loc[0] == '+' && tok->loc[1] == '+') { + return to_assign(new_add(unary(rest, tok->next), new_num(1, tok), tok)); + } + // Read --i as i-=1 + if (tok->loc[0] == '-' && tok->loc[1] == '-') { + return to_assign(new_sub(unary(rest, tok->next), new_num(1, tok), tok)); + } + // [GNU] labels-as-values + if (tok->loc[0] == '&' && tok->loc[1] == '&') { + Node *node = new_node(ND_LABEL_VAL, tok); + node->label = get_ident(tok->next); + node->goto_next = gotos; + gotos = node; + *rest = tok->next->next; + return node; } - return new_unary(ND_ADDR, lhs, tok); - } - if (EQUAL(tok, "*")) { - // [https://www.sigbus.info/n1570#6.5.3.2p4] This is an oddity - // in the C spec, but dereferencing a function shouldn't do - // anything. If foo is a function, `*foo`, `**foo` or `*****foo` - // are all equivalent to just `foo`. - Node *node = cast(rest, tok->next); - add_type(node); - if (node->ty->kind == TY_FUNC) return node; - return new_unary(ND_DEREF, node, tok); - } - if (EQUAL(tok, "!")) return new_unary(ND_NOT, cast(rest, tok->next), tok); - if (EQUAL(tok, "~")) return new_unary(ND_BITNOT, cast(rest, tok->next), tok); - // Read ++i as i+=1 - if (EQUAL(tok, "++")) - return to_assign(new_add(unary(rest, tok->next), new_num(1, tok), tok)); - // Read --i as i-=1 - if (EQUAL(tok, "--")) - return to_assign(new_sub(unary(rest, tok->next), new_num(1, tok), tok)); - // [GNU] labels-as-values - if (EQUAL(tok, "&&")) { - Node *node = new_node(ND_LABEL_VAL, tok); - node->label = get_ident(tok->next); - node->goto_next = gotos; - gotos = node; - *rest = tok->next->next; - return node; } return postfix(rest, tok); } -// struct-members = (declspec declarator ("," declarator)* ";")* +// struct-members = (declspec declarator ("," declarator)* ";" javadown?)* static void struct_members(Token **rest, Token *tok, Type *ty) { Member head = {}; Member *cur = &head; @@ -2712,6 +2786,9 @@ static void struct_members(Token **rest, Token *tok, Type *ty) { } cur = cur->next = mem; } + if (tok->kind == TK_JAVADOWN) { + tok = tok->next; + } } // If the last element is an array of incomplete type, it's // called a "flexible array member". It should behave as if @@ -2904,39 +2981,42 @@ static Node *postfix(Token **rest, Token *tok) { } Node *node = primary(&tok, tok); for (;;) { - if (EQUAL(tok, "(")) { - node = funcall(&tok, tok->next, node); - continue; - } - if (EQUAL(tok, "[")) { - // x[y] is short for *(x+y) - Token *start = tok; - Node *idx = expr(&tok, tok->next); - tok = skip(tok, ']'); - node = new_unary(ND_DEREF, new_add(node, idx, start), start); - continue; - } - if (EQUAL(tok, ".")) { - node = struct_ref(node, tok->next); - tok = tok->next->next; - continue; - } - if (EQUAL(tok, "->")) { - // x->y is short for (*x).y - node = new_unary(ND_DEREF, node, tok); - node = struct_ref(node, tok->next); - tok = tok->next->next; - continue; - } - if (EQUAL(tok, "++")) { - node = new_inc_dec(node, tok, 1); - tok = tok->next; - continue; - } - if (EQUAL(tok, "--")) { - node = new_inc_dec(node, tok, -1); - tok = tok->next; - continue; + if (tok->len == 1) { + if (tok->loc[0] == '(') { + node = funcall(&tok, tok->next, node); + continue; + } + if (tok->loc[0] == '[') { + // x[y] is short for *(x+y) + Token *start = tok; + Node *idx = expr(&tok, tok->next); + tok = skip(tok, ']'); + node = new_unary(ND_DEREF, new_add(node, idx, start), start); + continue; + } + if (tok->loc[0] == '.') { + node = struct_ref(node, tok->next); + tok = tok->next->next; + continue; + } + } else if (tok->len == 2) { + if (tok->loc[0] == '-' && tok->loc[1] == '>') { + // x->y is short for (*x).y + node = new_unary(ND_DEREF, node, tok); + node = struct_ref(node, tok->next); + tok = tok->next->next; + continue; + } + if (tok->loc[0] == '+' && tok->loc[1] == '+') { + node = new_inc_dec(node, tok, 1); + tok = tok->next; + continue; + } + if (tok->loc[0] == '-' && tok->loc[1] == '-') { + node = new_inc_dec(node, tok, -1); + tok = tok->next; + continue; + } } *rest = tok; return node; @@ -3033,71 +3113,73 @@ static Node *generic_selection(Token **rest, Token *tok) { // | str // | num static Node *primary(Token **rest, Token *tok) { - Token *start = tok; - if (EQUAL(tok, "(") && EQUAL(tok->next, "{")) { - // This is a GNU statement expresssion. - Node *node = new_node(ND_STMT_EXPR, tok); - node->body = compound_stmt(&tok, tok->next->next)->body; - *rest = skip(tok, ')'); - return node; - } - if (EQUAL(tok, "(")) { - Node *node = expr(&tok, tok->next); - *rest = skip(tok, ')'); - return node; - } - if (EQUAL(tok, "sizeof") && EQUAL(tok->next, "(") && - is_typename(tok->next->next)) { - Type *ty = typename(&tok, tok->next->next); - *rest = skip(tok, ')'); - if (ty->kind == TY_VLA) { - if (ty->vla_size) { - return new_var_node(ty->vla_size, tok); - } - Node *lhs = compute_vla_size(ty, tok); - Node *rhs = new_var_node(ty->vla_size, tok); - return new_binary(ND_COMMA, lhs, rhs, tok); - } - return new_ulong(ty->size, start); - } - if (EQUAL(tok, "sizeof")) { - Node *node = unary(rest, tok->next); - add_type(node); - if (node->ty->kind == TY_VLA) { - return get_vla_size(node->ty, tok); - } - return new_ulong(node->ty->size, tok); - } - if ((EQUAL(tok, "_Alignof") || EQUAL(tok, "__alignof__")) && - EQUAL(tok->next, "(") && is_typename(tok->next->next)) { - Type *ty = typename(&tok, tok->next->next); - *rest = skip(tok, ')'); - return new_ulong(ty->align, tok); - } - if (EQUAL(tok, "_Alignof") || EQUAL(tok, "__alignof__")) { - Node *node = unary(rest, tok->next); - add_type(node); - return new_ulong(node->ty->align, tok); - } - if (EQUAL(tok, "_Generic")) { - return generic_selection(rest, tok->next); - } - if (tok->len > 10 && !memcmp(tok->loc, "__builtin_", 10)) { - if (EQUAL(tok, "__builtin_constant_p")) { - tok = skip(tok->next, '('); - Node *e = expr(&tok, tok); + Token *start; + unsigned char kw; + start = tok; + if ((kw = GetKw(tok->loc, tok->len))) { + if (kw == KW_LP && EQUAL(tok->next, "{")) { + // This is a GNU statement expresssion. + Node *node = new_node(ND_STMT_EXPR, tok); + node->body = compound_stmt(&tok, tok->next->next)->body; *rest = skip(tok, ')'); - return new_num(is_const_expr(e), start); /* DCE */ + return node; } - if (EQUAL(tok, "__builtin_types_compatible_p")) { + if (kw == KW_LP) { + Node *node = expr(&tok, tok->next); + *rest = skip(tok, ')'); + return node; + } + if (kw == KW_SIZEOF && EQUAL(tok->next, "(") && + is_typename(tok->next->next)) { + Type *ty = typename(&tok, tok->next->next); + *rest = skip(tok, ')'); + if (ty->kind == TY_VLA) { + if (ty->vla_size) { + return new_var_node(ty->vla_size, tok); + } + Node *lhs = compute_vla_size(ty, tok); + Node *rhs = new_var_node(ty->vla_size, tok); + return new_binary(ND_COMMA, lhs, rhs, tok); + } + return new_ulong(ty->size, start); + } + if (kw == KW_SIZEOF) { + Node *node = unary(rest, tok->next); + add_type(node); + if (node->ty->kind == TY_VLA) { + return get_vla_size(node->ty, tok); + } + return new_ulong(node->ty->size, tok); + } + if ((kw == KW__ALIGNOF || kw == KW___ALIGNOF__) && EQUAL(tok->next, "(") && + is_typename(tok->next->next)) { + Type *ty = typename(&tok, tok->next->next); + *rest = skip(tok, ')'); + return new_ulong(ty->align, tok); + } + if ((kw == KW__ALIGNOF || kw == KW___ALIGNOF__)) { + Node *node = unary(rest, tok->next); + add_type(node); + return new_ulong(node->ty->align, tok); + } + if (kw == KW__GENERIC) { + return generic_selection(rest, tok->next); + } + if (kw == KW___BUILTIN_CONSTANT_P) { + tok = skip(tok->next, '('); + Node *e = assign(&tok, tok); + *rest = skip(tok, ')'); + return new_bool(is_const_expr(e), start); /* DCE */ + } + if (kw == KW___BUILTIN_TYPES_COMPATIBLE_P) { tok = skip(tok->next, '('); Type *t1 = typename(&tok, tok); tok = skip(tok, ','); Type *t2 = typename(&tok, tok); *rest = skip(tok, ')'); - return new_num(is_compatible(t1, t2), start); + return new_bool(is_compatible(t1, t2), start); } - if (EQUAL(tok, "__builtin_offsetof")) { + if (kw == KW___BUILTIN_OFFSETOF) { tok = skip(tok->next, '('); Token *stok = tok; Type *tstruct = typename(&tok, tok); @@ -3111,20 +3193,20 @@ static Node *primary(Token **rest, Token *tok) { for (Member *m = tstruct->members; m; m = m->next) { if (m->name->len == member->len && !memcmp(m->name->loc, member->loc, m->name->len)) { - return new_num(m->offset, start); + return new_ulong(m->offset, start); } } error_tok(member, "no such member"); } - if (EQUAL(tok, "__builtin_reg_class")) { + if (kw == KW___BUILTIN_REG_CLASS) { tok = skip(tok->next, '('); Type *ty = typename(&tok, tok); *rest = skip(tok, ')'); - if (is_integer(ty) || ty->kind == TY_PTR) return new_num(0, start); - if (is_flonum(ty)) return new_num(1, start); - return new_num(2, start); + if (is_integer(ty) || ty->kind == TY_PTR) return new_int(0, start); + if (is_flonum(ty)) return new_int(1, start); + return new_int(2, start); } - if (EQUAL(tok, "__builtin_compare_and_swap")) { + if (kw == KW___BUILTIN_COMPARE_AND_SWAP) { Node *node = new_node(ND_CAS, tok); tok = skip(tok->next, '('); node->cas_addr = assign(&tok, tok); @@ -3133,18 +3215,20 @@ static Node *primary(Token **rest, Token *tok) { tok = skip(tok, ','); node->cas_new = assign(&tok, tok); *rest = skip(tok, ')'); + node->ty = node->cas_addr->ty->base; return node; } - if (EQUAL(tok, "__builtin_atomic_exchange")) { + if (kw == KW___BUILTIN_ATOMIC_EXCHANGE) { Node *node = new_node(ND_EXCH, tok); tok = skip(tok->next, '('); node->lhs = assign(&tok, tok); tok = skip(tok, ','); node->rhs = assign(&tok, tok); + node->ty = node->lhs->ty->base; *rest = skip(tok, ')'); return node; } - if (EQUAL(tok, "__builtin_expect")) { /* do nothing */ + if (kw == KW___BUILTIN_EXPECT) { /* do nothing */ tok = skip(tok->next, '('); Node *node = assign(&tok, tok); tok = skip(tok, ','); @@ -3152,7 +3236,7 @@ static Node *primary(Token **rest, Token *tok) { *rest = skip(tok, ')'); return node; } - if (EQUAL(tok, "__builtin_assume_aligned")) { /* do nothing */ + if (kw == KW___BUILTIN_ASSUME_ALIGNED) { /* do nothing */ tok = skip(tok->next, '('); Node *node = assign(&tok, tok); tok = skip(tok, ','); @@ -3163,16 +3247,16 @@ static Node *primary(Token **rest, Token *tok) { *rest = skip(tok, ')'); return node; } - if (EQUAL(tok, "__builtin_add_overflow")) { + if (kw == KW___BUILTIN_ADD_OVERFLOW) { return builtin_overflow(rest, tok, new_add); } - if (EQUAL(tok, "__builtin_sub_overflow")) { + if (kw == KW___BUILTIN_SUB_OVERFLOW) { return builtin_overflow(rest, tok, new_sub); } - if (EQUAL(tok, "__builtin_mul_overflow")) { + if (kw == KW___BUILTIN_MUL_OVERFLOW) { return builtin_overflow(rest, tok, new_mul); } - if (EQUAL(tok, "__builtin_neg_overflow")) { + if (kw == KW___BUILTIN_NEG_OVERFLOW) { Token *start = tok; tok = skip(tok->next, '('); Node *lhs = assign(&tok, tok); @@ -3189,7 +3273,7 @@ static Node *primary(Token **rest, Token *tok) { node->ty = copy_type(ty_bool); return node; } - if (EQUAL(tok, "__builtin_fpclassify")) { + if (kw == KW___BUILTIN_FPCLASSIFY) { Node *node = new_node(ND_FPCLASSIFY, tok); node->fpc = calloc(1, sizeof(FpClassify)); node->ty = ty_int; @@ -3206,7 +3290,7 @@ static Node *primary(Token **rest, Token *tok) { *rest = skip(tok, ')'); return node; } - if (EQUAL(tok, "__builtin_popcount")) { + if (kw == KW___BUILTIN_POPCOUNT) { Token *t = skip(tok->next, '('); Node *node = assign(&t, t); if (is_const_expr(node)) { @@ -3214,8 +3298,7 @@ static Node *primary(Token **rest, Token *tok) { return new_num(__builtin_popcount(eval(node)), t); } } - if (EQUAL(tok, "__builtin_popcountl") || - EQUAL(tok, "__builtin_popcountll")) { + if (kw == KW___BUILTIN_POPCOUNTL || kw == KW___BUILTIN_POPCOUNTLL) { Token *t = skip(tok->next, '('); Node *node = assign(&t, t); if (is_const_expr(node)) { @@ -3223,7 +3306,7 @@ static Node *primary(Token **rest, Token *tok) { return new_num(__builtin_popcountl(eval(node)), t); } } - if (EQUAL(tok, "__builtin_ffs")) { + if (kw == KW___BUILTIN_FFS) { Token *t = skip(tok->next, '('); Node *node = assign(&t, t); if (is_const_expr(node)) { @@ -3231,7 +3314,7 @@ static Node *primary(Token **rest, Token *tok) { return new_num(__builtin_ffs(eval(node)), t); } } - if (EQUAL(tok, "__builtin_ffsl") || EQUAL(tok, "__builtin_ffsll")) { + if (kw == KW___BUILTIN_FFSL || kw == KW___BUILTIN_FFSLL) { Token *t = skip(tok->next, '('); Node *node = assign(&t, t); if (is_const_expr(node)) { @@ -3239,62 +3322,58 @@ static Node *primary(Token **rest, Token *tok) { return new_num(__builtin_ffsl(eval(node)), t); } } - } - if ((EQUAL(tok, "__builtin_strlen") || - (!opt_no_builtin && EQUAL(tok, "strlen"))) && - EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && - EQUAL(tok->next->next->next, ")")) { - *rest = tok->next->next->next->next; - return new_num(strlen(tok->next->next->str), tok); - } - if ((EQUAL(tok, "__builtin_strpbrk") || - (!opt_no_builtin && EQUAL(tok, "strpbrk")))) { - if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && - EQUAL(tok->next->next->next, ",") && - tok->next->next->next->next->kind == TK_STR && - EQUAL(tok->next->next->next->next->next, ")")) { - *rest = tok->next->next->next->next->next->next; - char *res = - strpbrk(tok->next->next->str, tok->next->next->next->next->str); - if (res) { - return new_var_node( - new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); - } else { - return new_num(0, tok); + if ((kw == KW___BUILTIN_STRLEN || (!opt_no_builtin && kw == KW_STRLEN)) && + EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && + EQUAL(tok->next->next->next, ")")) { + *rest = tok->next->next->next->next; + return new_num(strlen(tok->next->next->str), tok); + } + if ((kw == KW___BUILTIN_STRPBRK || (!opt_no_builtin && kw == KW_STRPBRK))) { + if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && + EQUAL(tok->next->next->next, ",") && + tok->next->next->next->next->kind == TK_STR && + EQUAL(tok->next->next->next->next->next, ")")) { + *rest = tok->next->next->next->next->next->next; + char *res = + strpbrk(tok->next->next->str, tok->next->next->next->next->str); + if (res) { + return new_var_node( + new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); + } else { + return new_num(0, tok); + } } } - } - if ((EQUAL(tok, "__builtin_strstr") || - (!opt_no_builtin && EQUAL(tok, "strstr")))) { - if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && - EQUAL(tok->next->next->next, ",") && - tok->next->next->next->next->kind == TK_STR && - EQUAL(tok->next->next->next->next->next, ")")) { - *rest = tok->next->next->next->next->next->next; - char *res = - strstr(tok->next->next->str, tok->next->next->next->next->str); - if (res) { - return new_var_node( - new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); - } else { - return new_num(0, tok); + if (kw == KW___BUILTIN_STRSTR || (!opt_no_builtin && kw == KW_STRSTR)) { + if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && + EQUAL(tok->next->next->next, ",") && + tok->next->next->next->next->kind == TK_STR && + EQUAL(tok->next->next->next->next->next, ")")) { + *rest = tok->next->next->next->next->next->next; + char *res = + strstr(tok->next->next->str, tok->next->next->next->next->str); + if (res) { + return new_var_node( + new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); + } else { + return new_num(0, tok); + } } } - } - if ((EQUAL(tok, "__builtin_strchr") || - (!opt_no_builtin && EQUAL(tok, "strchr")))) { - if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && - EQUAL(tok->next->next->next, ",") && - tok->next->next->next->next->kind == TK_NUM && - EQUAL(tok->next->next->next->next->next, ")")) { - *rest = tok->next->next->next->next->next->next; - char *res = - strchr(tok->next->next->str, tok->next->next->next->next->val); - if (res) { - return new_var_node( - new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); - } else { - return new_num(0, tok); + if (kw == KW___BUILTIN_STRCHR || (!opt_no_builtin && kw == KW_STRCHR)) { + if (EQUAL(tok->next, "(") && tok->next->next->kind == TK_STR && + EQUAL(tok->next->next->next, ",") && + tok->next->next->next->next->kind == TK_NUM && + EQUAL(tok->next->next->next->next->next, ")")) { + *rest = tok->next->next->next->next->next->next; + char *res = + strchr(tok->next->next->str, tok->next->next->next->next->val); + if (res) { + return new_var_node( + new_string_literal(res, array_of(ty_char, strlen(res) + 1)), tok); + } else { + return new_num(0, tok); + } } } } @@ -3302,6 +3381,13 @@ static Node *primary(Token **rest, Token *tok) { // Variable or enum constant VarScope *sc = find_var(tok); *rest = tok->next; +#ifdef IMPLICIT_FUNCTIONS + if (!sc && EQUAL(tok->next, "(")) { + Type *ty = func_type(ty_long); + ty->is_variadic = true; + return new_var_node(new_gvar(strndup(tok->loc, tok->len), ty), tok); + } +#endif // For "static inline" function if (sc && sc->var && sc->var->is_function) { if (current_fn) { @@ -3606,11 +3692,11 @@ void declare_builtin_functions(void) { declare0("trap", ty_int); declare0("unreachable", ty_int); declare1("ctz", ty_int, ty_int); - declare1("ctzl", ty_long, ty_long); - declare1("ctzll", ty_long, ty_long); + declare1("ctzl", ty_int, ty_long); + declare1("ctzll", ty_int, ty_long); declare1("clz", ty_int, ty_int); - declare1("clzl", ty_long, ty_long); - declare1("clzll", ty_long, ty_long); + declare1("clzl", ty_int, ty_long); + declare1("clzll", ty_int, ty_long); declare1("ffs", ty_int, ty_int); declare1("ffsl", ty_int, ty_long); declare1("ffsll", ty_int, ty_long); diff --git a/third_party/chibicc/preprocess.c b/third_party/chibicc/preprocess.c index b70605b7d..8eda785ca 100644 --- a/third_party/chibicc/preprocess.c +++ b/third_party/chibicc/preprocess.c @@ -22,7 +22,11 @@ // standard's wording: // https://github.com/rui314/chibicc/wiki/cpp.algo.pdf +#include "libc/log/libfatal.internal.h" +#include "libc/mem/arena.h" +#include "libc/stdio/stdio.h" #include "third_party/chibicc/chibicc.h" +#include "third_party/chibicc/kw.h" typedef struct CondIncl CondIncl; typedef struct Hideset Hideset; @@ -65,8 +69,8 @@ static int include_next_idx; static Token *preprocess2(Token *); static Macro *find_macro(Token *); -static bool is_hash(Token *tok) { - return tok->at_bol && EQUAL(tok, "#"); +static inline bool is_hash(Token *tok) { + return tok->at_bol && tok->len == 1 && tok->loc[0] == '#'; } // Some preprocessor directives such as #include allow extraneous @@ -151,13 +155,17 @@ static Token *append(Token *tok1, Token *tok2) { } static Token *skip_cond_incl2(Token *tok) { + unsigned char kw; while (tok->kind != TK_EOF) { - if (is_hash(tok) && (EQUAL(tok->next, "if") || EQUAL(tok->next, "ifdef") || - EQUAL(tok->next, "ifndef"))) { - tok = skip_cond_incl2(tok->next->next); - continue; + if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) { + if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) { + tok = skip_cond_incl2(tok->next->next); + continue; + } + if (kw == KW_ENDIF) { + return tok->next->next; + } } - if (is_hash(tok) && EQUAL(tok->next, "endif")) return tok->next->next; tok = tok->next; } return tok; @@ -166,15 +174,17 @@ static Token *skip_cond_incl2(Token *tok) { // Skip until next `#else`, `#elif` or `#endif`. // Nested `#if` and `#endif` are skipped. static Token *skip_cond_incl(Token *tok) { + unsigned char kw; while (tok->kind != TK_EOF) { - if (is_hash(tok) && (EQUAL(tok->next, "if") || EQUAL(tok->next, "ifdef") || - EQUAL(tok->next, "ifndef"))) { - tok = skip_cond_incl2(tok->next->next); - continue; + if (is_hash(tok) && (kw = GetKw(tok->next->loc, tok->next->len))) { + if (kw == KW_IF || kw == KW_IFDEF || kw == KW_IFNDEF) { + tok = skip_cond_incl2(tok->next->next); + continue; + } + if (kw == KW_ELIF || kw == KW_ELSE || kw == KW_ENDIF) { + break; + } } - if (is_hash(tok) && (EQUAL(tok->next, "elif") || EQUAL(tok->next, "else") || - EQUAL(tok->next, "endif"))) - break; tok = tok->next; } return tok; @@ -210,14 +220,20 @@ static Token *new_str_token(char *str, Token *tmpl) { static Token *copy_line(Token **rest, Token *tok) { Token head = {}; Token *cur = &head; - for (; !tok->at_bol; tok = tok->next) cur = cur->next = copy_token(tok); + for (; !tok->at_bol; tok = tok->next) { + cur = cur->next = copy_token(tok); + } cur->next = new_eof(tok); *rest = tok; return head.next; } static Token *new_num_token(int val, Token *tmpl) { - char *buf = xasprintf("%d\n", val); + char *p, *buf; + p = buf = malloc(13); + p = FormatInt32(p, val); + p[0] = '\n'; + p[1] = 0; return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf)); } @@ -248,6 +264,7 @@ static Token *read_const_expr(Token **rest, Token *tok) { // Read and evaluate a constant expression. static long eval_const_expr(Token **rest, Token *tok) { + __arena_push(); Token *start = tok; Token *expr = read_const_expr(rest, tok->next); expr = preprocess2(expr); @@ -268,6 +285,7 @@ static long eval_const_expr(Token **rest, Token *tok) { Token *rest2; long val = const_expr(&rest2, expr); if (rest2->kind != TK_EOF) error_tok(rest2, "extra token"); + __arena_pop(); return val; } @@ -327,7 +345,7 @@ static Macro *read_macro_definition(Token **rest, Token *tok) { if (tok->kind != TK_IDENT) error_tok(tok, "macro name must be an identifier"); name = strndup(tok->loc, tok->len); tok = tok->next; - if (!tok->has_space && EQUAL(tok, "(")) { + if (!tok->has_space && tok->len == 1 && tok->loc[0] == '(') { // Function-like macro char *va_args_name = NULL; MacroParam *params = read_macro_params(&tok, tok->next, &va_args_name); @@ -346,13 +364,18 @@ static MacroArg *read_macro_arg_one(Token **rest, Token *tok, bool read_rest) { Token *cur = &head; int level = 0; for (;;) { - if (level == 0 && EQUAL(tok, ")")) break; - if (level == 0 && !read_rest && EQUAL(tok, ",")) break; + if (level == 0 && tok->len == 1 && tok->loc[0] == ')') { + break; + } + if (level == 0 && !read_rest && tok->len == 1 && tok->loc[0] == ',') { + break; + } if (tok->kind == TK_EOF) error_tok(tok, "premature end of input"); - if (EQUAL(tok, "(")) + if (tok->len == 1 && tok->loc[0] == '(') { level++; - else if (EQUAL(tok, ")")) + } else if (tok->len == 1 && tok->loc[0] == ')') { level--; + } cur = cur->next = copy_token(tok); tok = tok->next; } @@ -377,7 +400,7 @@ static MacroArg *read_macro_args(Token **rest, Token *tok, MacroParam *params, } if (va_args_name) { MacroArg *arg; - if (EQUAL(tok, ")")) { + if (tok->len == 1 && tok->loc[0] == ')') { arg = calloc(1, sizeof(MacroArg)); arg->tok = new_eof(tok); } else { @@ -461,10 +484,11 @@ static Token *subst(Token *tok, MacroArg *args) { Token *cur = &head; while (tok->kind != TK_EOF) { // "#" followed by a parameter is replaced with stringized actuals. - if (EQUAL(tok, "#")) { + if (tok->len == 1 && tok->loc[0] == '#') { MacroArg *arg = find_arg(args, tok->next); - if (!arg) + if (!arg) { error_tok(tok->next, "'#' is not followed by a macro parameter"); + } cur = cur->next = stringize(tok, arg->tok); tok = tok->next->next; continue; @@ -472,7 +496,9 @@ static Token *subst(Token *tok, MacroArg *args) { // [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded // to the empty token list. Otherwise, its expaned to `,` and // __VA_ARGS__. - if (EQUAL(tok, ",") && EQUAL(tok->next, "##")) { + if (tok->len == 1 && tok->loc[0] == ',' && + (tok->next->len == 2 && + (tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) { MacroArg *arg = find_arg(args, tok->next->next); if (arg && arg->is_va_args) { if (arg->tok->kind == TK_EOF) { @@ -484,7 +510,7 @@ static Token *subst(Token *tok, MacroArg *args) { continue; } } - if (EQUAL(tok, "##")) { + if (tok->len == 2 && tok->loc[0] == '#' && tok->loc[1] == '#') { if (cur == &head) error_tok(tok, "'##' cannot appear at start of macro expansion"); if (tok->next->kind == TK_EOF) @@ -504,7 +530,8 @@ static Token *subst(Token *tok, MacroArg *args) { continue; } MacroArg *arg = find_arg(args, tok); - if (arg && EQUAL(tok->next, "##")) { + if (arg && (tok->next->len == 2 && + (tok->next->loc[0] == '#' && tok->next->loc[1] == '#'))) { Token *rhs = tok->next->next; if (arg->tok->kind == TK_EOF) { MacroArg *arg2 = find_arg(args, rhs); @@ -577,7 +604,7 @@ static bool expand_macro(Token **rest, Token *tok) { } // If a funclike macro token is not followed by an argument list, // treat it as a normal identifier. - if (!EQUAL(tok->next, "(")) return false; + if (!(tok->next->len == 1 && tok->next->loc[0] == '(')) return false; // Function-like macro application Token *macro_token = tok; MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name); @@ -709,6 +736,7 @@ static Token *include_file(Token *tok, char *path, Token *filename_tok) { // Read #line arguments static void read_line_marker(Token **rest, Token *tok) { + // TODO: This is broken if file is different? See gperf codegen. Token *start = tok; tok = preprocess(copy_line(rest, tok)); if (tok->kind != TK_NUM || tok->ty->kind != TY_INT) @@ -723,6 +751,7 @@ static void read_line_marker(Token **rest, Token *tok) { // Visit all tokens in `tok` while evaluating preprocessing // macros and directives. static Token *preprocess2(Token *tok) { + unsigned char kw; Token head = {}; Token *cur = &head; while (tok->kind != TK_EOF) { @@ -744,104 +773,111 @@ static Token *preprocess2(Token *tok) { } Token *start = tok; tok = tok->next; - if (EQUAL(tok, "include")) { - bool is_dquote; - char *filename = read_include_filename(&tok, tok->next, &is_dquote); - if (filename[0] != '/' && is_dquote) { - char *path = - xasprintf("%s/%s", dirname(strdup(start->file->name)), filename); - if (fileexists(path)) { - tok = include_file(tok, path, start->next->next); - continue; + if ((kw = GetKw(tok->loc, tok->len))) { + if (kw == KW_INCLUDE) { + bool is_dquote; + char *filename = read_include_filename(&tok, tok->next, &is_dquote); + if (filename[0] != '/' && is_dquote) { + char *tmp = strdup(start->file->name); + char *path = xasprintf("%s/%s", dirname(tmp), filename); + free(tmp); + bool exists = fileexists(path); + free(path); + if (exists) { + tok = include_file(tok, path, start->next->next); + continue; + } } + char *path = search_include_paths(filename); + tok = include_file(tok, path ? path : filename, start->next->next); + continue; + } + if (kw == KW_INCLUDE_NEXT) { + bool ignore; + char *filename = read_include_filename(&tok, tok->next, &ignore); + char *path = search_include_next(filename); + tok = include_file(tok, path ? path : filename, start->next->next); + continue; + } + if (kw == KW_DEFINE) { + read_macro_definition(&tok, tok->next); + continue; + } + if (kw == KW_UNDEF) { + tok = tok->next; + if (tok->kind != TK_IDENT) + error_tok(tok, "macro name must be an identifier"); + undef_macro(strndup(tok->loc, tok->len)); + tok = skip_line(tok->next); + continue; + } + if (kw == KW_IF) { + long val = eval_const_expr(&tok, tok); + push_cond_incl(start, val); + if (!val) tok = skip_cond_incl(tok); + continue; + } + if (kw == KW_IFDEF) { + bool defined = find_macro(tok->next); + push_cond_incl(tok, defined); + tok = skip_line(tok->next->next); + if (!defined) tok = skip_cond_incl(tok); + continue; + } + if (kw == KW_IFNDEF) { + bool defined = find_macro(tok->next); + push_cond_incl(tok, !defined); + tok = skip_line(tok->next->next); + if (defined) tok = skip_cond_incl(tok); + continue; + } + if (kw == KW_ELIF) { + if (!cond_incl || cond_incl->ctx == IN_ELSE) + error_tok(start, "stray #elif"); + cond_incl->ctx = IN_ELIF; + if (!cond_incl->included && eval_const_expr(&tok, tok)) + cond_incl->included = true; + else + tok = skip_cond_incl(tok); + continue; + } + if (kw == KW_ELSE) { + if (!cond_incl || cond_incl->ctx == IN_ELSE) + error_tok(start, "stray #else"); + cond_incl->ctx = IN_ELSE; + tok = skip_line(tok->next); + if (cond_incl->included) tok = skip_cond_incl(tok); + continue; + } + if (kw == KW_ENDIF) { + if (!cond_incl) error_tok(start, "stray #endif"); + cond_incl = cond_incl->next; + tok = skip_line(tok->next); + continue; + } + if (kw == KW_LINE) { + read_line_marker(&tok, tok->next); + continue; } - char *path = search_include_paths(filename); - tok = include_file(tok, path ? path : filename, start->next->next); - continue; - } - if (EQUAL(tok, "include_next")) { - bool ignore; - char *filename = read_include_filename(&tok, tok->next, &ignore); - char *path = search_include_next(filename); - tok = include_file(tok, path ? path : filename, start->next->next); - continue; - } - if (EQUAL(tok, "define")) { - read_macro_definition(&tok, tok->next); - continue; - } - if (EQUAL(tok, "undef")) { - tok = tok->next; - if (tok->kind != TK_IDENT) - error_tok(tok, "macro name must be an identifier"); - undef_macro(strndup(tok->loc, tok->len)); - tok = skip_line(tok->next); - continue; - } - if (EQUAL(tok, "if")) { - long val = eval_const_expr(&tok, tok); - push_cond_incl(start, val); - if (!val) tok = skip_cond_incl(tok); - continue; - } - if (EQUAL(tok, "ifdef")) { - bool defined = find_macro(tok->next); - push_cond_incl(tok, defined); - tok = skip_line(tok->next->next); - if (!defined) tok = skip_cond_incl(tok); - continue; - } - if (EQUAL(tok, "ifndef")) { - bool defined = find_macro(tok->next); - push_cond_incl(tok, !defined); - tok = skip_line(tok->next->next); - if (defined) tok = skip_cond_incl(tok); - continue; - } - if (EQUAL(tok, "elif")) { - if (!cond_incl || cond_incl->ctx == IN_ELSE) - error_tok(start, "stray #elif"); - cond_incl->ctx = IN_ELIF; - if (!cond_incl->included && eval_const_expr(&tok, tok)) - cond_incl->included = true; - else - tok = skip_cond_incl(tok); - continue; - } - if (EQUAL(tok, "else")) { - if (!cond_incl || cond_incl->ctx == IN_ELSE) - error_tok(start, "stray #else"); - cond_incl->ctx = IN_ELSE; - tok = skip_line(tok->next); - if (cond_incl->included) tok = skip_cond_incl(tok); - continue; - } - if (EQUAL(tok, "endif")) { - if (!cond_incl) error_tok(start, "stray #endif"); - cond_incl = cond_incl->next; - tok = skip_line(tok->next); - continue; - } - if (EQUAL(tok, "line")) { - read_line_marker(&tok, tok->next); - continue; } if (tok->kind == TK_PP_NUM) { read_line_marker(&tok, tok); continue; } - if (EQUAL(tok, "pragma") && EQUAL(tok->next, "once")) { + if (kw == KW_PRAGMA && EQUAL(tok->next, "once")) { hashmap_put(&pragma_once, tok->file->name, (void *)1); tok = skip_line(tok->next->next); continue; } - if (EQUAL(tok, "pragma")) { + if (kw == KW_PRAGMA) { do { tok = tok->next; } while (!tok->at_bol); continue; } - if (EQUAL(tok, "error")) error_tok(tok, "error"); + if (kw == KW_ERROR) { + error_tok(tok, "error"); + } // `#`-only line is legal. It's called a null directive. if (tok->at_bol) continue; error_tok(tok, "invalid preprocessor directive"); @@ -901,11 +937,7 @@ static Token *base_file_macro(Token *tmpl) { // __DATE__ is expanded to the current date, e.g. "May 17 2020". static char *format_date(struct tm *tm) { - _Alignas(char) static char mon[][4] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", - }; - return xasprintf("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday, + return xasprintf("\"%s %2d %d\"", kMonthNameShort[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900); } @@ -914,6 +946,15 @@ static char *format_time(struct tm *tm) { return xasprintf("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec); } +void init_macros_conditional(void) { + if (opt_pg) define_macro("__PG__", "1"); + if (opt_pic) define_macro("__PIC__", "1"); + if (opt_sse3) define_macro("__SSE3__", "1"); + if (opt_sse4) define_macro("__SSE4__", "1"); + if (opt_popcnt) define_macro("__POPCNT__", "1"); + if (opt_fentry) define_macro("__MFENTRY__", "1"); +} + void init_macros(void) { char *val, *name = "\ __chibicc__\000\ @@ -1232,9 +1273,6 @@ __SSE2_MATH__\000\ define_macro(name, val); name = val + strlen(val) + 1; } while (*name); -#ifdef __SSE3__ - define_macro("__SSE3__", "1"); -#endif add_builtin("__FILE__", file_macro); add_builtin("__LINE__", line_macro); add_builtin("__COUNTER__", counter_macro); @@ -1306,8 +1344,10 @@ static void join_adjacent_string_literals(Token *tok) { len = len + t->ty->array_len - 1; } char *buf = calloc(tok1->ty->base->size, len); + int j = 0; int i = 0; for (Token *t = tok1; t != tok2; t = t->next) { + ++j; memcpy(buf + i, t->str, t->ty->size); i = i + t->ty->size - t->ty->base->size; } diff --git a/third_party/chibicc/printast.c b/third_party/chibicc/printast.c index a8dcdd7ca..4d17b8b44 100644 --- a/third_party/chibicc/printast.c +++ b/third_party/chibicc/printast.c @@ -130,6 +130,8 @@ static void PrintType(FILE *f, int l, const char *s, Type *t) { PrintBool(f, l + 2, "is_flexible: ", t->is_flexible); PrintBool(f, l + 2, "is_packed: ", t->is_packed); PrintBool(f, l + 2, "is_aligned: ", t->is_aligned); + PrintBool(f, l + 2, "is_const: ", t->is_const); + PrintBool(f, l + 2, "is_static: ", t->is_static); PrintType(f, l + 2, "return_ty: ", t->return_ty); PrintType(f, l + 2, "params: ", t->params); PrintBool(f, l + 2, "is_variadic: ", t->is_variadic); diff --git a/third_party/chibicc/pybind.c b/third_party/chibicc/pybind.c new file mode 100644 index 000000000..2b8bf360c --- /dev/null +++ b/third_party/chibicc/pybind.c @@ -0,0 +1,547 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" +#include "libc/fmt/conv.h" +#include "libc/log/libfatal.internal.h" +#include "libc/mem/mem.h" +#include "libc/runtime/gc.internal.h" +#include "libc/stdio/append.internal.h" +#include "libc/str/str.h" +#include "libc/x/x.h" +#include "third_party/chibicc/chibicc.h" + +static void AppendStringLiteral(char **b, const char *s, const char *indent) { + int c, w, l, o; + for (o = l = 0;; l = c) { + switch ((c = *s++ & 255)) { + case 0: + return; + case '\r': + continue; + case '\t': + w = READ16LE("\\t"); + break; + case '\n': + w = READ32LE("\\n\\\n"); + break; + case '"': + w = READ16LE("\\\""); + break; + case '\\': + w = READ16LE("\\\\"); + break; + case '`': + /* convert markdown to restructured text */ + if (o) { + o = 0; + w = READ16LE("''"); + } else if (*s == '`') { + w = '`'; + ++s; + } else { + o = 1; + w = READ16LE("``"); + } + break; + default: + if ((0x00 <= c && c <= 0x1F) || c == 0x7F || (c == '?' && l == '?')) { + w = '\\'; + w |= ('0' + ((c & 0300) >> 6)) << 010; + w |= ('0' + ((c & 0070) >> 3)) << 020; + w |= ('0' + ((c & 0007) >> 0)) << 030; + } else { + w = c; + } + break; + } + appendw(b, w); + if (c == '\n' && indent) { + appends(b, indent); + } + } +} + +static void AppendJavadown(char **b, const struct Javadown *j) { + size_t i; + const char *s, *s2; + if (j->title && *j->title) { + AppendStringLiteral(b, j->title, 0); + if (j->text && *j->text) { + appendw(b, READ32LE("\\n\\\n")); + appendw(b, READ32LE("\\n\\\n")); + } + } + if (j->text && *j->text) { + AppendStringLiteral(b, j->text, 0); + } + if (j->tags.n) { + appendw(b, READ32LE("\\n\\\n")); + for (i = 0; i < j->tags.n; ++i) { + appendw(b, READ64LE("\\n\\\n:\0\0")); + AppendStringLiteral(b, j->tags.p[i].tag, 0); + s = j->tags.p[i].text; + if (!strcmp(j->tags.p[i].tag, "param") && s && (s2 = strchr(s, ' '))) { + appendw(b, ' '); + appendd(b, s, s2 - s); + s = s2 + 1; + } + appendw(b, ':'); + if (s && *s) { + appendw(b, ' '); + AppendStringLiteral(b, s, " "); + } + } + } +} + +static void AppendScalar(char **b, struct Type *ty, char *name, int i) { + if (ty->is_atomic) appendw(b, READ64LE("_Atomic ")); + if (i && ty->is_const) appendw(b, READ64LE("const \0")); + if (ty->is_unsigned) appends(b, "unsigned "); + appends(b, name); +} + +static void AppendType(char **b, struct Type *ty, int i) { + switch (ty->kind) { + case TY_VOID: + appends(b, "void"); + break; + case TY_BOOL: + appends(b, "_Bool"); + break; + case TY_CHAR: + AppendScalar(b, ty, "char", i); + break; + case TY_SHORT: + AppendScalar(b, ty, "short", i); + break; + case TY_INT: + case TY_ENUM: + AppendScalar(b, ty, "int", i); + break; + case TY_LONG: + AppendScalar(b, ty, "long", i); + break; + case TY_INT128: + AppendScalar(b, ty, "__int128", i); + break; + case TY_FLOAT: + AppendScalar(b, ty, "float", i); + break; + case TY_DOUBLE: + AppendScalar(b, ty, "double", i); + break; + case TY_LDOUBLE: + AppendScalar(b, ty, "long double", i); + break; + case TY_FUNC: + AppendType(b, ty->return_ty, i); + appends(b, " (*)()"); + break; + case TY_PTR: + if (!ty->array_len) { + AppendType(b, ty->base, i + 1); + if (ty->base->kind != TY_FUNC) { + appendw(b, '*'); + if (i && ty->is_const) appendw(b, READ64LE(" const\0")); + if (ty->is_restrict) appends(b, " restrict"); + } + break; + } + /* undecay */ + case TY_ARRAY: + AppendType(b, ty->base, i + 1); + appendw(b, '['); + if (!i && ty->is_static) appendw(b, READ64LE("static ")); + if (!i && ty->is_restrict) appends(b, "restrict "); + appendf(b, "%lu", ty->array_len); + appendw(b, ']'); + break; + default: + assert(0); + } +} + +static bool IsSupportedReturnType(struct Type *ty) { + switch (ty->kind) { + case TY_VOID: + case TY_BOOL: + case TY_CHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_FLOAT: + case TY_DOUBLE: + return true; + case TY_PTR: + if (ty->base->kind == TY_CHAR) { + return !ty->base->is_unsigned; + } else { + return false; + } + default: + return false; + } +} + +static bool IsSupportedParameterType(struct Type *ty) { + switch (ty->kind) { + case TY_BOOL: + case TY_CHAR: + case TY_SHORT: + case TY_INT: + case TY_LONG: + case TY_FLOAT: + case TY_DOUBLE: + return true; + case TY_PTR: + if (ty->base->kind == TY_CHAR) { + return true; + } else { + return false; + } + default: + return false; + } +} + +static bool Reject(char **b, struct Obj *obj, struct Type *ty, + const char *reason) { + appendf(b, "\n/* %s: %s: ", obj->name, reason); + AppendType(b, ty, 0); + appendw(b, READ32LE(" */\n")); + return false; +} + +static bool IsFunctionSupported(char **b, struct Obj *func) { + Obj *param; + if (!IsSupportedReturnType(func->ty->return_ty)) { + return Reject(b, func, func->ty->return_ty, "unsupported return type"); + } + for (param = func->params; param; param = param->next) { + if (!IsSupportedParameterType(param->ty)) { + return Reject(b, func, param->ty, "unsupported parameter type"); + } + } + return true; +} + +static int GetParamDirective(struct Obj **param) { + bool is_unsigned; + is_unsigned = (*param)->ty->is_unsigned; + switch ((*param)->ty->kind) { + case TY_BOOL: + return 'p'; + case TY_CHAR: + return is_unsigned ? 'B' : 'b'; + case TY_SHORT: + return is_unsigned ? 'H' : 'h'; + case TY_INT: + return is_unsigned ? 'I' : 'i'; + case TY_LONG: + return is_unsigned ? 'L' : 'l'; + case TY_FLOAT: + return 'f'; + case TY_DOUBLE: + return 'd'; + case TY_PTR: + if ((*param)->ty->base->kind == TY_CHAR) { + if ((*param)->ty->base->is_unsigned && + ((*param)->next && ((*param)->next->ty->kind == TY_LONG && + (*param)->next->ty->is_unsigned))) { + *param = (*param)->next; + return READ16LE("y*"); + } else { + return READ16LE("s*"); + } + } else { + UNREACHABLE(); + } + default: + UNREACHABLE(); + } +} + +static char *GetParamIntermediate(struct Obj **param) { + bool is_unsigned; + is_unsigned = (*param)->ty->is_unsigned; + switch ((*param)->ty->kind) { + case TY_BOOL: + return "int"; + case TY_CHAR: + return is_unsigned ? "unsigned char" : "signed char"; + case TY_SHORT: + return is_unsigned ? "unsigned short" : "short"; + case TY_INT: + return is_unsigned ? "unsigned" : "int"; + case TY_LONG: + return is_unsigned ? "unsigned long" : "long"; + case TY_FLOAT: + return "float"; + case TY_DOUBLE: + return "float"; + case TY_PTR: + if ((*param)->ty->base->kind == TY_CHAR) { + *param = (*param)->next; + return "Py_buffer"; + } else { + UNREACHABLE(); + } + default: + UNREACHABLE(); + } +} + +static const char *GetReturnIntermediate(struct Type *ty) { + bool is_unsigned; + is_unsigned = ty->is_unsigned; + switch (ty->kind) { + case TY_BOOL: + return "int"; + case TY_CHAR: + return is_unsigned ? "unsigned char" : "signed char"; + case TY_SHORT: + return is_unsigned ? "unsigned short" : "short"; + case TY_INT: + return is_unsigned ? "unsigned" : "int"; + case TY_LONG: + return is_unsigned ? "unsigned long" : "long"; + case TY_FLOAT: + return "float"; + case TY_DOUBLE: + return "float"; + case TY_PTR: + if (ty->base->kind == TY_CHAR) { + if (ty->base->is_const) { + return "const char*"; + } else { + return "char*"; + } + } else { + UNREACHABLE(); + } + default: + UNREACHABLE(); + } +} + +static void AppendFunction(char **b, Obj *func, const char *module) { + Obj *param; + const char *name; + appendf(b, "\nPyDoc_STRVAR(pb_%s_%s_doc,\n\"%s($module", module, func->name, + func->name); + for (param = func->params; param; param = param->next) { + appendw(b, READ16LE(", ")); + appends(b, param->name); + } + appends(b, ")"); + if (func->javadown) { + appends(b, "\\n\\\n--\\n\\n\\\n"); + AppendJavadown(b, func->javadown->javadown); + } + appendw(b, READ32LE("\");\n\n")); + AppendType(b, func->ty->return_ty, 0); + appendw(b, ' '); + appends(b, func->name); + appendw(b, '('); + if (func->params) { + AppendType(b, func->params->ty, 0); + for (param = func->params->next; param; param = param->next) { + appendw(b, READ16LE(", ")); + AppendType(b, param->ty, 0); + } + } else { + appendw(b, READ32LE("void")); + } + appendw(b, READ32LE(");\n\n")); + appends(b, "static PyObject*\n"); + appendf(b, "pb_%s_%s(PyObject* self_, PyObject* args_)\n", module, + func->name); + appendw(b, READ16LE("{\n")); + appendw(b, READ32LE(" ")); + appends(b, "PyObject* res_;\n"); + if (func->ty->return_ty->kind != TY_VOID) { + appendw(b, READ32LE(" ")); + appends(b, GetReturnIntermediate(func->ty->return_ty)); + appendw(b, READ64LE(" ret_;\n")); + } + if (func->params) { + for (param = func->params; param; param = param->next) { + name = param->name; + appendw(b, READ32LE(" ")); + appends(b, GetParamIntermediate(¶m)); + appendw(b, ' '); + appends(b, name); + appendw(b, READ16LE(";\n")); + if (!param) break; + } + appends(b, " if (!PyArg_ParseTuple(args_, \""); + for (param = func->params; param; param = param->next) { + appendw(b, GetParamDirective(¶m)); + if (!param) break; + } + appendf(b, ":%s\"", func->name); + for (param = func->params; param; param = param->next) { + appendf(b, ", &%s", param->name); + } + appends(b, ")) return 0;\n"); + } + appendw(b, READ32LE(" ")); + if (func->ty->return_ty->kind != TY_VOID) { + appendw(b, READ64LE("ret_ = ")); + } + appends(b, func->name); + appendw(b, '('); + for (param = func->params; param; param = param->next) { + if (param != func->params) { + appendw(b, READ16LE(", ")); + } + appends(b, param->name); + if (param->ty->kind == TY_PTR && param->ty->base->kind == TY_CHAR) { + appendw(b, READ32LE(".buf")); + if (param->ty->base->is_unsigned && + (param->next && (param->next->ty->kind == TY_LONG && + param->next->ty->is_unsigned))) { + appendf(b, ", %s.len", param->name); + param = param->next; + } + } + } + appends(b, ");\n"); + switch (func->ty->return_ty->kind) { + case TY_VOID: + appends(b, " res_ = Py_None;\n"); + appends(b, " Py_INCREF(res_);\n"); + break; + case TY_BOOL: + appends(b, " res_ = ret_ ? Py_True : Py_False;\n"); + appends(b, " Py_INCREF(res_);\n"); + break; + case TY_CHAR: + case TY_SHORT: + case TY_INT: + appends(b, " res_ = PyLong_FromLong(ret_);\n"); + break; + case TY_LONG: + if (func->ty->return_ty->is_unsigned) { + appends(b, " res_ = PyLong_FromUnsignedLong(ret_);\n"); + } else { + appends(b, " res_ = PyLong_FromLong(ret_);\n"); + } + break; + case TY_FLOAT: + case TY_DOUBLE: + appends(b, " res_ = PyFloat_FromDouble(ret_);\n"); + break; + case TY_PTR: + appends(b, "\ + if (ret_) {\n\ + res_ = PyUnicode_DecodeUTF8(ret_, strlen(ret_), 0);\n\ + } else {\n\ + res_ = Py_None;\n\ + Py_INCREF(res_);\n\ + }\n"); + if (!func->ty->return_ty->base->is_const) { + appends(b, " free(res_);\n"); + } + break; + default: + assert(0); + } + for (param = func->params; param; param = param->next) { + if (param->ty->kind == TY_PTR && param->ty->base->kind == TY_CHAR) { + appendf(b, " PyBuffer_Release(&%s);\n", param->name); + } + } + appends(b, " return res_;\n"); + appendw(b, READ16LE("}\n")); +} + +void output_bindings_python(const char *path, Obj *prog, Token *tok) { + int fd; + Obj *obj; + char *b = 0; + char *bm = 0; + const char *module; + module = basename(stripexts(strdup(tok->file->name))); + appends(&b, "\ +#define PY_SSIZE_T_CLEAN\n\ +#include \"third_party/python/Include/abstract.h\"\n\ +#include \"third_party/python/Include/boolobject.h\"\n\ +#include \"third_party/python/Include/floatobject.h\"\n\ +#include \"third_party/python/Include/import.h\"\n\ +#include \"third_party/python/Include/longobject.h\"\n\ +#include \"third_party/python/Include/methodobject.h\"\n\ +#include \"third_party/python/Include/modsupport.h\"\n\ +#include \"third_party/python/Include/moduleobject.h\"\n\ +#include \"third_party/python/Include/pymacro.h\"\n\ +#include \"third_party/python/Include/pyport.h\"\n\ +"); + if (tok->file->javadown) { + appendf(&b, "\nPyDoc_STRVAR(pb_%s_doc,\n\"", module); + AppendJavadown(&b, tok->file->javadown); + appendw(&b, READ32LE("\");\n")); + } + for (obj = prog; obj; obj = obj->next) { + if (obj->is_function) { + if (obj->is_static) continue; + if (!obj->is_definition) continue; + if (*obj->name == '_') continue; + if (strchr(obj->name, '$')) continue; + if (!IsFunctionSupported(&b, obj)) continue; + AppendFunction(&b, obj, module); + appendf(&bm, " {\"%s\", pb_%s_%s, %s, pb_%s_%s_doc},\n", obj->name, + module, obj->name, obj->params ? "METH_VARARGS" : "METH_NOARGS", + module, obj->name); + } + } + appends(&bm, " {0},\n"); + appendf(&b, "\nstatic PyMethodDef pb_%s_methods[] = {\n", module); + appendd(&b, bm, appendz(bm).i); + appends(&b, "};\n"); + appendf(&b, "\n\ +static struct PyModuleDef pb_%s_module = {\n\ + PyModuleDef_HEAD_INIT,\n\ + \"%s\",\n\ + %s,\n\ + -1,\n\ + pb_%s_methods,\n\ +};\n\ +\n\ +PyMODINIT_FUNC\n\ +PyInit_%s(void)\n\ +{\n\ + return PyModule_Create(&pb_%s_module);\n\ +}\n\ +\n\ +__attribute__((__section__(\".rodata.pytab.1\")))\n\ +const struct _inittab _PyImport_Inittab_%s = {\n\ + \"%s\",\n\ + PyInit_%s,\n\ +};\n\ +", + module, module, + tok->file->javadown ? gc(xasprintf("pb_%s_doc", module)) : "0", + module, module, module, module, module, module); + CHECK_NE(-1, (fd = creat(path, 0644))); + CHECK_NE(-1, xwrite(fd, b, appendz(b).i)); + CHECK_NE(-1, close(fd)); + free(bm); + free(b); +} diff --git a/third_party/chibicc/strarray.c b/third_party/chibicc/strarray.c index 501cd9a5f..0ee1028c0 100644 --- a/third_party/chibicc/strarray.c +++ b/third_party/chibicc/strarray.c @@ -1,14 +1,17 @@ #include "third_party/chibicc/chibicc.h" void strarray_push(StringArray *arr, char *s) { + size_t i; if (!arr->data) { arr->data = calloc(8, sizeof(char *)); arr->capacity = 8; } - if (arr->capacity == arr->len) { - arr->data = realloc(arr->data, sizeof(char *) * arr->capacity * 2); - arr->capacity *= 2; - for (int i = arr->len; i < arr->capacity; i++) arr->data[i] = NULL; + if (arr->len + 1 == arr->capacity) { + arr->capacity += arr->capacity >> 1; + arr->data = realloc(arr->data, arr->capacity * sizeof(*arr->data)); + for (i = arr->len; i < arr->capacity; i++) { + arr->data[i] = 0; + } } arr->data[arr->len++] = s; } diff --git a/third_party/chibicc/tokenize.c b/third_party/chibicc/tokenize.c index 5f12b6367..9da784d20 100644 --- a/third_party/chibicc/tokenize.c +++ b/third_party/chibicc/tokenize.c @@ -1,7 +1,10 @@ +#include "libc/log/log.h" +#include "libc/nexgen32e/bsf.h" +#include "libc/runtime/runtime.h" +#include "libc/str/str.h" #include "third_party/chibicc/chibicc.h" #include "third_party/chibicc/file.h" - -#define LOOKINGAT(TOK, OP) (!memcmp(TOK, OP, sizeof(OP) - 1)) +#include "third_party/chibicc/kw.h" // Input file static File *current_file; @@ -78,11 +81,6 @@ void warn_tok(Token *tok, char *fmt, ...) { va_end(ap); } -static int is_space(int c) { - return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || - c == '\v'; -} - // Consumes the current token if it matches `op`. bool equal(Token *tok, char *op, size_t n) { return n == tok->len && !memcmp(tok->loc, op, tok->len); @@ -122,17 +120,27 @@ static Token *new_token(TokenKind kind, char *start, char *end) { // Read an identifier and returns the length of it. // If p does not point to a valid identifier, 0 is returned. -static int read_ident(char *start) { +noinline int read_ident(char *start) { + uint32_t c; char *p = start; - uint32_t c = decode_utf8(&p, p); - if (!is_ident1(c)) return 0; + if (('a' <= *p && *p <= 'z') || ('A' <= *p && *p <= 'Z') || *p == '_') { + ++p; + } else { + c = decode_utf8(&p, p); + if (!is_ident1(c)) return 0; + } for (;;) { - char *q; - c = decode_utf8(&q, p); - if (!is_ident2(c)) { - return p - start; + if (('a' <= *p && *p <= 'z') || ('A' <= *p && *p <= 'Z') || + ('0' <= *p && *p <= '9') || *p == '_') { + ++p; + } else { + char *q; + c = decode_utf8(&q, p); + if (!is_ident2(c)) { + return p - start; + } + p = q; } - p = q; } } @@ -142,46 +150,31 @@ int read_punct(char *p) { "<<=", ">>=", "...", "==", "!=", "<=", ">=", "->", "+=", "-=", "*=", "/=", "++", "--", "%=", "&=", "|=", "^=", "&&", "||", "<<", ">>", "##", }; - for (int i = 0; i < sizeof(kPunct) / sizeof(*kPunct); i++) { - for (int j = 0;;) { - if (p[j] != kPunct[i][j]) break; - if (!kPunct[i][++j]) return j; + if (ispunct(*p)) { + for (int i = 0; i < sizeof(kPunct) / sizeof(*kPunct); i++) { + for (int j = 0;;) { + if (p[j] != kPunct[i][j]) break; + if (!kPunct[i][++j]) return j; + } } + return 1; + } else { + return 0; } - return ispunct(*p) ? 1 : 0; } static bool is_keyword(Token *tok) { - static HashMap map; - if (map.capacity == 0) { - static char *kw[] = { - "return", "if", "else", - "for", "while", "int", - "sizeof", "char", "struct", - "union", "short", "long", - "void", "typedef", "_Bool", - "enum", "static", "goto", - "break", "continue", "switch", - "case", "default", "extern", - "_Alignof", "_Alignas", "do", - "signed", "unsigned", "const", - "volatile", "auto", "register", - "restrict", "__restrict", "__restrict__", - "_Noreturn", "float", "double", - "typeof", "asm", "_Thread_local", - "__thread", "_Atomic", "__attribute__", - }; - for (int i = 0; i < sizeof(kw) / sizeof(*kw); i++) { - hashmap_put(&map, kw[i], (void *)1); - } - } - return hashmap_get2(&map, tok->loc, tok->len); + unsigned char kw; + kw = GetKw(tok->loc, tok->len); + return kw && !(kw & -64); } int read_escaped_char(char **new_pos, char *p) { + int x; + unsigned c; if ('0' <= *p && *p <= '7') { // Read an octal number. - unsigned c = *p++ - '0'; + c = *p++ - '0'; if ('0' <= *p && *p <= '7') { c = (c << 3) + (*p++ - '0'); if ('0' <= *p && *p <= '7') c = (c << 3) + (*p++ - '0'); @@ -191,14 +184,15 @@ int read_escaped_char(char **new_pos, char *p) { } if (*p == 'x') { // Read a hexadecimal number. - p++; - if (!isxdigit(*p)) error_at(p, "invalid hex escape sequence"); - unsigned c = 0; - for (; isxdigit(*p); p++) { - c = (c << 4) + hextoint(*p); /* TODO(jart): overflow here unicode_test */ + if ((++p, x = kHexToInt[*p++ & 255]) != -1) { + for (c = x; (x = kHexToInt[*p & 255]) != -1; p++) { + c = c << 4 | x; + } + *new_pos = p; + return c; + } else { + error_at(p, "invalid hex escape sequence"); } - *new_pos = p; - return c; } *new_pos = p + 1; // Escape sequences are defined using themselves here. E.g. @@ -330,14 +324,16 @@ static Token *read_char_literal(char *start, char *quote, Type *ty) { return tok; } -static bool convert_pp_int(Token *tok) { +bool convert_pp_int(Token *tok) { char *p = tok->loc; // Read a binary, octal, decimal or hexadecimal number. int base = 10; - if (!strncasecmp(p, "0x", 2) && isxdigit(p[2])) { + if ((p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) && + kHexToInt[p[2] & 255] != -1) { p += 2; base = 16; - } else if (!strncasecmp(p, "0b", 2) && (p[2] == '0' || p[2] == '1')) { + } else if ((p[0] == '0' && (p[1] == 'b' || p[1] == 'B')) && + (p[2] == '0' || p[2] == '1')) { p += 2; base = 2; } else if (*p == '0') { @@ -347,23 +343,27 @@ static bool convert_pp_int(Token *tok) { // Read U, L or LL suffixes. bool l = false; bool u = false; - if (LOOKINGAT(p, "LLU") || LOOKINGAT(p, "LLu") || LOOKINGAT(p, "llU") || - LOOKINGAT(p, "llu") || LOOKINGAT(p, "ULL") || LOOKINGAT(p, "Ull") || - LOOKINGAT(p, "uLL") || LOOKINGAT(p, "ull")) { - p += 3; - l = u = true; - } else if (!strncasecmp(p, "lu", 2) || !strncasecmp(p, "ul", 2)) { - p += 2; - l = u = true; - } else if (LOOKINGAT(p, "LL") || LOOKINGAT(p, "ll")) { - p += 2; - l = true; - } else if (*p == 'L' || *p == 'l') { - p++; - l = true; - } else if (*p == 'U' || *p == 'u') { - p++; - u = true; + int a, b, c; + if ((a = kToLower[p[0] & 255]) && (a == 'l' || a == 'u')) { + b = kToLower[p[1] & 255]; + c = b ? kToLower[p[2] & 255] : 0; + if ((a == 'l' && b == 'l' && c == 'u') || + (a == 'u' && b == 'l' && c == 'l')) { + p += 3; + l = u = true; + } else if ((a == 'l' && b == 'u') || (a == 'u' && b == 'l')) { + p += 2; + l = u = true; + } else if (a == 'l' && b == 'l') { + p += 2; + l = true; + } else if (a == 'l') { + p += 1; + l = true; + } else if (a == 'u') { + p += 1; + u = true; + } } if (p != tok->loc + tok->len) return false; // Infer a type. @@ -462,128 +462,157 @@ Token *tokenize_string_literal(Token *tok, Type *basety) { // Tokenize a given string and returns new tokens. Token *tokenize(File *file) { - current_file = file; - char *p = file->contents; Token head = {}; Token *cur = &head; + char *q, *p = file->contents; struct Javadown *javadown; + current_file = file; at_bol = true; has_space = false; - while (*p) { - // Skip line comments. - if (LOOKINGAT(p, "//")) { - p += 2; - while (*p != '\n') p++; - has_space = true; - continue; - } - // Javadoc-style markdown comments. - if (LOOKINGAT(p, "/**") && p[3] != '/' && p[3] != '*') { - char *q = strstr(p + 3, "*/"); - if (!q) error_at(p, "unclosed javadown"); - javadown = ParseJavadown(p + 3, q - p - 3 - 2); - if (javadown->isfileoverview) { - FreeJavadown(file->javadown); - file->javadown = javadown; - } else { - cur = cur->next = new_token(TK_JAVADOWN, p, q + 2); - cur->javadown = javadown; - } - p = q + 2; - has_space = true; - continue; - } - // Skip block comments. - if (LOOKINGAT(p, "/*")) { - char *q = strstr(p + 2, "*/"); - if (!q) error_at(p, "unclosed block comment"); - p = q + 2; - has_space = true; - continue; - } - // Skip newline. - if (*p == '\n') { - p++; - at_bol = true; - has_space = false; - continue; - } - // Skip whitespace characters. - if (is_space(*p)) { - p++; - has_space = true; - continue; - } - // Numeric literal - if (isdigit(*p) || (*p == '.' && isdigit(p[1]))) { - char *q = p++; - for (;;) { - if (p[0] && p[1] && strchr("eEpP", p[0]) && strchr("+-", p[1])) { + for (;;) { + switch (*p) { + case 0: + cur = cur->next = new_token(TK_EOF, p, p); + add_line_numbers(head.next); + return head.next; + case '/': + // Skip line comments. + if (p[1] == '/') { p += 2; - } else if (isalnum(*p) || *p == '.') { - p++; - } else { - break; + while (*p != '\n') p++; + has_space = true; + continue; } - } - cur = cur->next = new_token(TK_PP_NUM, q, p); - continue; - } - // String literal - if (*p == '"') { - cur = cur->next = read_string_literal(p, p); - p += cur->len; - continue; - } - // UTF-8 string literal - if (LOOKINGAT(p, "u8\"")) { - cur = cur->next = read_string_literal(p, p + 2); - p += cur->len; - continue; - } - // UTF-16 string literal - if (LOOKINGAT(p, "u\"")) { - cur = cur->next = read_utf16_string_literal(p, p + 1); - p += cur->len; - continue; - } - // Wide string literal - if (LOOKINGAT(p, "L\"")) { - cur = cur->next = read_utf32_string_literal(p, p + 1, ty_int); - p += cur->len; - continue; - } - // UTF-32 string literal - if (LOOKINGAT(p, "U\"")) { - cur = cur->next = read_utf32_string_literal(p, p + 1, ty_uint); - p += cur->len; - continue; - } - // Character literal - if (*p == '\'') { - cur = cur->next = read_char_literal(p, p, ty_int); - cur->val = (char)cur->val; - p += cur->len; - continue; - } - // UTF-16 character literal - if (LOOKINGAT(p, "u'")) { - cur = cur->next = read_char_literal(p, p + 1, ty_ushort); - cur->val &= 0xffff; - p += cur->len; - continue; - } - // Wide character literal - if (LOOKINGAT(p, "L'")) { - cur = cur->next = read_char_literal(p, p + 1, ty_int); - p += cur->len; - continue; - } - // UTF-32 character literal - if (LOOKINGAT(p, "U'")) { - cur = cur->next = read_char_literal(p, p + 1, ty_uint); - p += cur->len; - continue; + // Javadoc-style markdown comments. + if (p[1] == '*' && p[2] == '*' && p[3] != '/' && p[3] != '*') { + q = strstr(p + 3, "*/"); + if (!q) error_at(p, "unclosed javadown"); + javadown = ParseJavadown(p + 3, q - p - 3 - 2); + if (javadown->isfileoverview) { + FreeJavadown(file->javadown); + file->javadown = javadown; + } else { + cur = cur->next = new_token(TK_JAVADOWN, p, q + 2); + cur->javadown = javadown; + } + p = q + 2; + has_space = true; + continue; + } + // Skip block comments. + if (p[1] == '*') { + q = strstr(p + 2, "*/"); + if (!q) error_at(p, "unclosed block comment"); + p = q + 2; + has_space = true; + continue; + } + break; + case '\n': + // Skip newline. + p++; + at_bol = true; + has_space = false; + continue; + case ' ': + case '\t': + case '\r': + case '\f': + case '\v': + // Skip whitespace characters. + p++; + has_space = true; + continue; + case '.': + if (!isdigit(p[1])) break; + /* fallthrough */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + // Numeric literal + q = p++; + for (;;) { + if (p[0] && p[1] && + (p[0] == 'e' || p[0] == 'E' || p[0] == 'p' || p[0] == 'P') && + (p[1] == '+' || p[1] == '-')) { + p += 2; + } else if (('0' <= *p && *p <= '9') || ('A' <= *p && *p <= 'Z') || + ('a' <= *p && *p <= 'z') || *p == '.') { + p++; + } else { + break; + } + } + cur = cur->next = new_token(TK_PP_NUM, q, p); + continue; + case '"': + // String literal + cur = cur->next = read_string_literal(p, p); + p += cur->len; + continue; + case 'u': + // UTF-8 string literal + if (p[1] == '8' && p[2] == '"') { + cur = cur->next = read_string_literal(p, p + 2); + p += cur->len; + continue; + } + // UTF-16 string literal + if (p[1] == '"') { + cur = cur->next = read_utf16_string_literal(p, p + 1); + p += cur->len; + continue; + } + // UTF-16 character literal + if (p[1] == '\'') { + cur = cur->next = read_char_literal(p, p + 1, ty_ushort); + cur->val &= 0xffff; + p += cur->len; + continue; + } + break; + case 'L': + // Wide string literal + if (p[1] == '"') { + cur = cur->next = read_utf32_string_literal(p, p + 1, ty_int); + p += cur->len; + continue; + } + // Wide character literal + if (p[1] == '\'') { + cur = cur->next = read_char_literal(p, p + 1, ty_int); + p += cur->len; + continue; + } + break; + case '\'': + // Character literal + cur = cur->next = read_char_literal(p, p, ty_int); + cur->val = (char)cur->val; + p += cur->len; + continue; + case 'U': + // UTF-32 string literal + if (p[1] == '"') { + cur = cur->next = read_utf32_string_literal(p, p + 1, ty_uint); + p += cur->len; + continue; + } + // UTF-32 character literal + if (p[1] == '\'') { + cur = cur->next = read_char_literal(p, p + 1, ty_uint); + p += cur->len; + continue; + } + default: + break; } // Identifier or keyword int ident_len = read_ident(p); @@ -601,9 +630,6 @@ Token *tokenize(File *file) { } error_at(p, "invalid token"); } - cur = cur->next = new_token(TK_EOF, p, p); - add_line_numbers(head.next); - return head.next; } File **get_input_files(void) { @@ -630,29 +656,56 @@ static uint32_t read_universal_char(char *p, int len) { // Replace \u or \U escape sequences with corresponding UTF-8 bytes. static void convert_universal_chars(char *p) { + uint32_t c; char *q = p; - while (*p) { - if (LOOKINGAT(p, "\\u")) { - uint32_t c = read_universal_char(p + 2, 4); - if (c) { - p += 6; - q += encode_utf8(q, c); - } else { - *q++ = *p++; + for (;;) { +#if defined(__GNUC__) && defined(__x86_64__) && !defined(__chibicc__) + typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1))); + typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16))); + if (!((uintptr_t)p & 15)) { + xmm_t v; + unsigned m; + xmm_t z = {0}; + xmm_t s = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', + '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'}; + for (;;) { + v = *(const xmm_t *)p; + m = __builtin_ia32_pmovmskb128((v == z) | (v == s)); + if (!m) { + *(xmm_u *)q = v; + p += 16; + q += 16; + } else { + m = bsf(m); + memmove(q, p, m); + p += m; + q += m; + break; + } } - } else if (LOOKINGAT(p, "\\U")) { - uint32_t c = read_universal_char(p + 2, 8); - if (c) { - p += 10; - q += encode_utf8(q, c); - } else { - *q++ = *p++; + } +#endif + if (p[0]) { + if (p[0] == '\\') { + if (p[1] == 'u') { + if ((c = read_universal_char(p + 2, 4))) { + p += 6; + q += encode_utf8(q, c); + continue; + } + } else if (p[1] == 'U') { + if ((c = read_universal_char(p + 2, 8))) { + p += 10; + q += encode_utf8(q, c); + continue; + } + } else if (p[0] == '\\') { + *q++ = *p++; + } } - } else if (p[0] == '\\') { - *q++ = *p++; *q++ = *p++; } else { - *q++ = *p++; + break; } } *q = '\0'; @@ -673,5 +726,6 @@ Token *tokenize_file(char *path) { input_files[file_no] = file; input_files[file_no + 1] = NULL; file_no++; + /* ftrace_install(); */ return tokenize(file); } diff --git a/third_party/chibicc/unicode.c b/third_party/chibicc/unicode.c index 385a03927..9a0f143a0 100644 --- a/third_party/chibicc/unicode.c +++ b/third_party/chibicc/unicode.c @@ -60,7 +60,7 @@ uint32_t decode_utf8(char **new_pos, char *p) { return c; } -static bool in_range(uint32_t *range, uint32_t c) { +static bool in_range(const uint32_t *range, uint32_t c) { for (int i = 0; range[i] != -1; i += 2) { if (range[i] <= c && c <= range[i + 1]) { return true; @@ -80,7 +80,7 @@ static bool in_range(uint32_t *range, uint32_t c) { // 0x00BE-0x00C0 are allowed, while neither ⟘ (U+27D8) nor ' ' // (U+3000, full-width space) are allowed because they are out of range. bool is_ident1(uint32_t c) { - static uint32_t range[] = { + static const uint32_t range[] = { 0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF, 0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6, 0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F, @@ -106,7 +106,7 @@ bool is_ident1(uint32_t c) { // Returns true if a given character is acceptable as a non-first // character of an identifier. bool is_ident2(uint32_t c) { - static uint32_t range[] = { + static const uint32_t range[] = { 0x0300, 0x036F, 0x1DC0, 0x1DFF, 0x20D0, 0x20FF, 0xFE20, 0xFE2F, -1, }; if (is_ident1(c)) return true; diff --git a/third_party/dlmalloc/README.cosmo b/third_party/dlmalloc/README.cosmo index 8a6be68be..95e599734 100644 --- a/third_party/dlmalloc/README.cosmo +++ b/third_party/dlmalloc/README.cosmo @@ -1,15 +1,21 @@ -Numerous local changes were made while vendoring Doug Lee's original -dlmalloc sources. Those changes basically boil down to: +ORIGIN - 1. Fewer #ifdefs - 2. More modules (so linker can do a better job) - 3. Delete code we don't need (cf. Knight Capital) - 4. Readability / stylistic consistency + http://gee.cs.oswego.edu/ -Since we haven't made any genuine improvements to Doug Lee's legendary -allocator, we feel this folder faithfully presents his intended work, in -harmony with Cosmopolitan conventions. +LOCAL CHANGES -The only deleted code we're sure has compelling merit is the mspace -functionality. If we ever need memory pools, they might be more -appropriately vendored under //third_party/dlmalloc_mspace. + Numerous local changes were made while vendoring Doug Lee's original + dlmalloc sources. Those changes basically boil down to: + + 1. Fewer #ifdefs + 2. More modules (so linker can do a better job) + 3. Delete code we don't need (cf. Knight Capital) + 4. Readability / stylistic consistency + + Since we haven't made any genuine improvements to Doug Lee's legendary + allocator, we feel this folder faithfully presents his intended work, in + harmony with Cosmopolitan conventions. + + The only deleted code we're sure has compelling merit is the mspace + functionality. If we ever need memory pools, they might be more + appropriately vendored under //third_party/dlmalloc_mspace. diff --git a/third_party/dlmalloc/bulk_free.c b/third_party/dlmalloc/bulk_free.c index 8e78c3cf4..77d4c285c 100644 --- a/third_party/dlmalloc/bulk_free.c +++ b/third_party/dlmalloc/bulk_free.c @@ -3,13 +3,13 @@ /** * Frees and clears (sets to NULL) each non-null pointer in the given - * array. This is likely to be faster than freeing them one-by-one. If - * footers are used, pointers that have been allocated in different - * mspaces are not freed or cleared, and the count of all such pointers - * is returned. For large arrays of pointers with poor locality, it may - * be worthwhile to sort this array before calling bulk_free. + * array. This is twice as fast as freeing them one-by-one. If footers + * are used, pointers that have been allocated in different mspaces are + * not freed or cleared, and the count of all such pointers is returned. + * For large arrays of pointers with poor locality, it may be worthwhile + * to sort this array before calling bulk_free. */ -size_t bulk_free(void *array[], size_t nelem) { +size_t dlbulk_free(void *array[], size_t nelem) { /* * Try to free all pointers in the given array. Note: this could be * made faster, by delaying consolidation, at the price of disabling diff --git a/third_party/dlmalloc/dlcalloc.c b/third_party/dlmalloc/dlcalloc.c index 43343a8d1..9e132ce71 100644 --- a/third_party/dlmalloc/dlcalloc.c +++ b/third_party/dlmalloc/dlcalloc.c @@ -6,6 +6,8 @@ void *dlcalloc(size_t n_elements, size_t elem_size) { size_t req; if (__builtin_mul_overflow(n_elements, elem_size, &req)) req = -1; mem = dlmalloc(req); - if (mem != 0 && calloc_must_clear(mem2chunk(mem))) memset(mem, 0, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) { + bzero(mem, req); + } return mem; } diff --git a/third_party/dlmalloc/dlindependent_calloc.c b/third_party/dlmalloc/dlindependent_calloc.c index 6687ceca5..499f63d1b 100644 --- a/third_party/dlmalloc/dlindependent_calloc.c +++ b/third_party/dlmalloc/dlindependent_calloc.c @@ -66,7 +66,7 @@ static void **ialloc(mstate m, size_t n_elements, size_t *sizes, int opts, assert(!is_mmapped(p)); if (opts & 0x2) { /* optionally clear the elements */ - memset((size_t *)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + bzero((size_t *)mem, remainder_size - SIZE_T_SIZE - array_size); } /* If not provided, allocate the pointer array as final part of chunk */ diff --git a/third_party/dlmalloc/dlmalloc.c b/third_party/dlmalloc/dlmalloc.c index 0c8f758be..d08e1fca8 100644 --- a/third_party/dlmalloc/dlmalloc.c +++ b/third_party/dlmalloc/dlmalloc.c @@ -881,17 +881,13 @@ textstartup void dlmalloc_init(void) { void *dlmemalign$impl(mstate m, size_t alignment, size_t bytes) { void *mem = 0; - if (alignment < MIN_CHUNK_SIZE) { /* must be at least a minimum chunk size */ - alignment = MIN_CHUNK_SIZE; /* is 32 bytes on NexGen32e */ - } - if ((alignment & (alignment - SIZE_T_ONE)) != 0) { /* Ensure a power of 2 */ - alignment = roundup2pow(alignment); - } if (bytes >= MAX_REQUEST - alignment) { if (m != 0) { /* Test isn't needed but avoids compiler warning */ enomem(); } } else { + /* alignment is 32+ bytes rounded up to nearest two power */ + alignment = 2ul << bsrl(MAX(MIN_CHUNK_SIZE, alignment) - 1); size_t nb = request2size(bytes); size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; mem = dlmalloc_impl(req, false); diff --git a/third_party/dlmalloc/dlmalloc.internal.h b/third_party/dlmalloc/dlmalloc.internal.h index 916bf481d..a2afa9c1a 100644 --- a/third_party/dlmalloc/dlmalloc.internal.h +++ b/third_party/dlmalloc/dlmalloc.internal.h @@ -7,6 +7,7 @@ #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/log/backtrace.internal.h" +#include "libc/nexgen32e/bsf.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" #if !(__ASSEMBLER__ + __LINKER__ + 0) @@ -854,7 +855,7 @@ extern struct MallocParams g_mparams; #define compute_bit2idx(X, I) \ { \ unsigned int J; \ - J = __builtin_ctz(X); \ + J = bsf(X); \ I = (bindex_t)J; \ } @@ -1309,6 +1310,7 @@ struct MallocStats dlmalloc_stats(mstate) hidden; int dlmalloc_sys_trim(mstate, size_t) hidden; void dlmalloc_dispose_chunk(mstate, mchunkptr, size_t) hidden; mchunkptr dlmalloc_try_realloc_chunk(mstate, mchunkptr, size_t, int) hidden; +size_t dlbulk_free(void *[], size_t) hidden; COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/third_party/dlmalloc/dlmalloc_stats.c b/third_party/dlmalloc/dlmalloc_stats.c index adabaada8..ec3421902 100644 --- a/third_party/dlmalloc/dlmalloc_stats.c +++ b/third_party/dlmalloc/dlmalloc_stats.c @@ -22,7 +22,7 @@ */ struct MallocStats dlmalloc_stats(mstate m) { struct MallocStats res; - memset(&res, 0, sizeof(res)); + bzero(&res, sizeof(res)); ensure_initialization(); if (!PREACTION(m)) { check_malloc_state(m); diff --git a/third_party/linenoise/linenoise.c b/third_party/linenoise/linenoise.c index 012ea0be1..8c2ff1b67 100644 --- a/third_party/linenoise/linenoise.c +++ b/third_party/linenoise/linenoise.c @@ -14,6 +14,7 @@ │ - Fix flickering │ │ - Add UTF-8 editing │ │ - Add CTRL-R search │ +│ - Support unlimited lines │ │ - React to terminal resizing │ │ - Don't generate .data section │ │ - Support terminal flow control │ @@ -25,9 +26,9 @@ │ - Fix corruption issues by using generalized parsing │ │ - Implement nearly all GNU readline editing shortcuts │ │ - Remove heavyweight dependencies like printf/sprintf │ -│ - Remove ISIG→^C→EAGAIN hack and catch signals properly │ +│ - Remove ISIG→^C→EAGAIN hack and use ephemeral handlers │ │ - Support running on Windows in MinTTY or CMD.EXE on Win10+ │ -│ - Support diacratics, русский, Ελληνικά, 中国人, 한국인, 日本 │ +│ - Support diacratics, русский, Ελληνικά, 中国人, 日本語, 한국인 │ │ │ │ SHORTCUTS │ │ │ @@ -47,6 +48,8 @@ │ ALT-> END OF HISTORY │ │ ALT-F FORWARD WORD │ │ ALT-B BACKWARD WORD │ +│ CTRL-ALT-F FORWARD EXPR │ +│ CTRL-ALT-B BACKWARD EXPR │ │ CTRL-K KILL LINE FORWARDS │ │ CTRL-U KILL LINE BACKWARDS │ │ ALT-H KILL WORD BACKWARDS │ @@ -60,11 +63,14 @@ │ ALT-U UPPERCASE WORD │ │ ALT-L LOWERCASE WORD │ │ ALT-C CAPITALIZE WORD │ +│ CTRL-C INTERRUPT PROCESS │ +│ CTRL-Z SUSPEND PROCESS │ +│ CTRL-\ QUIT PROCESS │ +│ CTRL-S PAUSE OUTPUT │ +│ CTRL-Q UNPAUSE OUTPUT (IF PAUSED) │ +│ CTRL-Q ESCAPED INSERT │ │ CTRL-SPACE SET MARK │ │ CTRL-X CTRL-X GOTO MARK │ -│ CTRL-S PAUSE OUTPUT │ -│ CTRL-Q RESUME OUTPUT │ -│ CTRL-Z SUSPEND PROCESS │ │ │ │ EXAMPLE │ │ │ @@ -125,6 +131,7 @@ #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/intrin/asan.internal.h" +#include "libc/log/libfatal.internal.h" #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/mem/mem.h" @@ -143,6 +150,7 @@ #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sa.h" #include "libc/sysv/consts/sig.h" +#include "libc/sysv/consts/termios.h" #include "libc/unicode/unicode.h" #include "third_party/linenoise/linenoise.h" #include "tool/build/lib/case.h" @@ -200,9 +208,38 @@ struct linenoiseState { unsigned mark; /* saved cursor position */ unsigned yi, yj; /* boundaries of last yank */ char seq[2][16]; /* keystroke history for yanking code */ + char final; /* set to true on last update */ char dirty; /* if an update was squashed */ }; +static const unsigned short kMirrorLeft[][2] = { + {L'(', L')'}, {L'[', L']'}, {L'{', L'}'}, {L'⁅', L'⁆'}, + {L'⁽', L'⁾'}, {L'₍', L'₎'}, {L'⌈', L'⌉'}, {L'⌊', L'⌋'}, + {L'〈', L'〉'}, {L'❨', L'❩'}, {L'❪', L'❫'}, {L'❬', L'❭'}, + {L'❮', L'❯'}, {L'❰', L'❱'}, {L'❲', L'❳'}, {L'❴', L'❵'}, + {L'⟅', L'⟆'}, {L'⟦', L'⟧'}, {L'⟨', L'⟩'}, {L'⟪', L'⟫'}, + {L'⟬', L'⟭'}, {L'⟮', L'⟯'}, {L'⦃', L'⦄'}, {L'⦅', L'⦆'}, + {L'⦇', L'⦈'}, {L'⦉', L'⦊'}, {L'⦋', L'⦌'}, {L'⦍', L'⦐'}, + {L'⦏', L'⦎'}, {L'⦑', L'⦒'}, {L'⦓', L'⦔'}, {L'⦗', L'⦘'}, + {L'⧘', L'⧙'}, {L'⧚', L'⧛'}, {L'⧼', L'⧽'}, {L'﹙', L'﹚'}, + {L'﹛', L'﹜'}, {L'﹝', L'﹞'}, {L'(', L')'}, {L'[', L']'}, + {L'{', L'}'}, {L'「', L'」'}, +}; + +static const unsigned short kMirrorRight[][2] = { + {L')', L'('}, {L']', L'['}, {L'}', L'{'}, {L'⁆', L'⁅'}, + {L'⁾', L'⁽'}, {L'₎', L'₍'}, {L'⌉', L'⌈'}, {L'⌋', L'⌊'}, + {L'〉', L'〈'}, {L'❩', L'❨'}, {L'❫', L'❪'}, {L'❭', L'❬'}, + {L'❯', L'❮'}, {L'❱', L'❰'}, {L'❳', L'❲'}, {L'❵', L'❴'}, + {L'⟆', L'⟅'}, {L'⟧', L'⟦'}, {L'⟩', L'⟨'}, {L'⟫', L'⟪'}, + {L'⟭', L'⟬'}, {L'⟯', L'⟮'}, {L'⦄', L'⦃'}, {L'⦆', L'⦅'}, + {L'⦈', L'⦇'}, {L'⦊', L'⦉'}, {L'⦌', L'⦋'}, {L'⦎', L'⦏'}, + {L'⦐', L'⦍'}, {L'⦒', L'⦑'}, {L'⦔', L'⦓'}, {L'⦘', L'⦗'}, + {L'⧙', L'⧘'}, {L'⧛', L'⧚'}, {L'⧽', L'⧼'}, {L'﹚', L'﹙'}, + {L'﹜', L'﹛'}, {L'﹞', L'﹝'}, {L')', L'('}, {L']', L'['}, + {L'}', L'{'}, {L'」', L'「'}, +}; + static const char *const kUnsupported[] = {"dumb", "cons25", "emacs"}; static int gotint; @@ -210,6 +247,7 @@ static int gotcont; static int gotwinch; static char rawmode; static char maskmode; +static char ispaused; static char iscapital; static int historylen; static struct linenoiseRing ring; @@ -226,6 +264,35 @@ static linenoiseCompletionCallback *completionCallback; static void linenoiseAtExit(void); static void linenoiseRefreshLine(struct linenoiseState *); +static unsigned GetMirror(const unsigned short A[][2], size_t n, unsigned c) { + int l, m, r; + l = 0; + r = n - 1; + while (l <= r) { + m = (l + r) >> 1; + if (A[m][0] < c) { + l = m + 1; + } else if (A[m][0] > c) { + r = m - 1; + } else { + return A[m][1]; + } + } + return 0; +} + +static unsigned GetMirrorLeft(unsigned c) { + return GetMirror(kMirrorRight, ARRAYLEN(kMirrorRight), c); +} + +static unsigned GetMirrorRight(unsigned c) { + return GetMirror(kMirrorLeft, ARRAYLEN(kMirrorLeft), c); +} + +static int isxseparator(wint_t c) { + return iswseparator(c) && !GetMirrorLeft(c) && !GetMirrorRight(c); +} + static int notwseparator(wint_t c) { return !iswseparator(c); } @@ -586,13 +653,20 @@ static int linenoiseIsUnsupportedTerm(void) { return res; } +static void linenoiseUnpause(int fd) { + if (ispaused) { + tcflow(fd, TCOON); + ispaused = 0; + } +} + static int enableRawMode(int fd) { struct termios raw; struct sigaction sa; if (tcgetattr(fd, &orig_termios) != -1) { raw = orig_termios; - raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP); - raw.c_lflag &= ~(ECHO | ICANON | IEXTEN); + raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); + raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); raw.c_oflag &= ~OPOST; raw.c_iflag |= IUTF8; raw.c_cflag |= CS8; @@ -617,6 +691,7 @@ static int enableRawMode(int fd) { void linenoiseDisableRawMode(void) { if (rawmode != -1) { + linenoiseUnpause(rawmode); sigaction(SIGCONT, &orig_cont, 0); sigaction(SIGWINCH, &orig_winch, 0); tcsetattr(rawmode, TCSAFLUSH, &orig_termios); @@ -633,6 +708,9 @@ static int linenoiseWrite(int fd, const void *p, size_t n) { errno = EINTR; return -1; } + if (ispaused) { + return 0; + } rc = write(fd, p, n); if (rc == -1 && errno == EINTR) { continue; @@ -982,11 +1060,88 @@ static char *linenoiseRefreshHints(struct linenoiseState *l) { return ab.b; } +static size_t Forward(struct linenoiseState *l, size_t pos) { + return pos + GetUtf8(l->buf + pos, l->len - pos).n; +} + +static size_t Backward(struct linenoiseState *l, size_t pos) { + if (pos) { + do --pos; + while (pos && (l->buf[pos] & 0300) == 0200); + } + return pos; +} + +static int linenoiseMirrorLeft(struct linenoiseState *l, unsigned res[2]) { + unsigned c, pos, left, right, depth, index; + if ((pos = Backward(l, l->pos))) { + right = GetUtf8(l->buf + pos, l->len - pos).c; + if ((left = GetMirrorLeft(right))) { + depth = 0; + index = pos; + do { + pos = Backward(l, pos); + c = GetUtf8(l->buf + pos, l->len - pos).c; + if (c == right) { + ++depth; + } else if (c == left) { + if (depth) { + --depth; + } else { + res[0] = pos; + res[1] = index; + return 0; + } + } + } while (pos); + } + } + return -1; +} + +static int linenoiseMirrorRight(struct linenoiseState *l, unsigned res[2]) { + struct rune rune; + unsigned pos, left, right, depth, index; + pos = l->pos; + rune = GetUtf8(l->buf + pos, l->len - pos); + left = rune.c; + if ((right = GetMirrorRight(left))) { + depth = 0; + index = pos; + do { + pos += rune.n; + rune = GetUtf8(l->buf + pos, l->len - pos); + if (rune.c == left) { + ++depth; + } else if (rune.c == right) { + if (depth) { + --depth; + } else { + res[0] = index; + res[1] = pos; + return 0; + } + } + } while (pos + rune.n < l->len); + } + return -1; +} + +static int linenoiseMirror(struct linenoiseState *l, unsigned res[2]) { + int rc; + rc = linenoiseMirrorLeft(l, res); + if (rc == -1) rc = linenoiseMirrorRight(l, res); + return rc; +} + static void linenoiseRefreshLineImpl(struct linenoiseState *l, int force) { char *hint; + char flipit; + char hasflip; char haswides; struct abuf ab; struct rune rune; + unsigned flip[2]; const char *p, *buf; struct winsize oldsize; int i, x, y, t, xn, yn, cx, cy, tn, resized; @@ -995,6 +1150,13 @@ static void linenoiseRefreshLineImpl(struct linenoiseState *l, int force) { /* * synchonize the i/o state */ + if (ispaused) { + if (force) { + linenoiseUnpause(l->ofd); + } else { + return; + } + } if (!force && HasPendingInput(l->ifd)) { l->dirty = 1; return; @@ -1004,6 +1166,7 @@ static void linenoiseRefreshLineImpl(struct linenoiseState *l, int force) { gotwinch = 0; l->ws = GetTerminalSize(l->ws, l->ifd, l->ofd); } + hasflip = !l->final && !linenoiseMirror(l, flip); StartOver: fd = l->ofd; @@ -1090,13 +1253,16 @@ StartOver: if (maskmode) { abAppendw(&ab, '*'); } else { + flipit = hasflip && (i == flip[0] || i == flip[1]); + if (flipit) abAppendw(&ab, READ32LE("\033[1m")); abAppendw(&ab, tpenc(rune.c)); + if (flipit) abAppendw(&ab, READ64LE("\033[22m\0\0")); } t = wcwidth(rune.c); t = MAX(0, t); x += t; } - if ((hint = linenoiseRefreshHints(l))) { + if (!l->final && (hint = linenoiseRefreshHints(l))) { if (GetMonospaceWidth(hint, strlen(hint), 0) < xn - x) { if (cx < 0) { cx = x; @@ -1203,18 +1369,6 @@ static void linenoiseEditRefresh(struct linenoiseState *l) { linenoiseRefreshLine(l); } -static size_t Forward(struct linenoiseState *l, size_t pos) { - return pos + GetUtf8(l->buf + pos, l->len - pos).n; -} - -static size_t Backward(struct linenoiseState *l, size_t pos) { - if (pos) { - do --pos; - while (pos && (l->buf[pos] & 0300) == 0200); - } - return pos; -} - static size_t Backwards(struct linenoiseState *l, size_t pos, int pred(wint_t)) { size_t i; @@ -1296,6 +1450,28 @@ static void linenoiseEditRightWord(struct linenoiseState *l) { linenoiseRefreshLine(l); } +static void linenoiseEditLeftExpr(struct linenoiseState *l) { + unsigned mark[2]; + l->pos = Backwards(l, l->pos, isxseparator); + if (!linenoiseMirrorLeft(l, mark)) { + l->pos = mark[0]; + } else { + l->pos = Backwards(l, l->pos, notwseparator); + } + linenoiseRefreshLine(l); +} + +static void linenoiseEditRightExpr(struct linenoiseState *l) { + unsigned mark[2]; + l->pos = Forwards(l, l->pos, isxseparator); + if (!linenoiseMirrorRight(l, mark)) { + l->pos = Forward(l, mark[1]); + } else { + l->pos = Forwards(l, l->pos, notwseparator); + } + linenoiseRefreshLine(l); +} + static void linenoiseEditDelete(struct linenoiseState *l) { size_t i; if (l->pos == l->len) return; @@ -1484,6 +1660,76 @@ static void linenoiseEditGoto(struct linenoiseState *l) { linenoiseRefreshLine(l); } +static size_t linenoiseEscape(char *d, const char *s, size_t n) { + char *p; + size_t i; + unsigned c, w, l; + for (p = d, l = i = 0; i < n; ++i) { + switch ((c = s[i] & 255)) { + CASE('\a', w = READ16LE("\\a")); + CASE('\b', w = READ16LE("\\b")); + CASE('\t', w = READ16LE("\\t")); + CASE('\n', w = READ16LE("\\n")); + CASE('\v', w = READ16LE("\\v")); + CASE('\f', w = READ16LE("\\f")); + CASE('\r', w = READ16LE("\\r")); + CASE('"', w = READ16LE("\\\"")); + CASE('\'', w = READ16LE("\\\'")); + CASE('\\', w = READ16LE("\\\\")); + default: + if ((0x00 <= c && c <= 0x1F) || c == 0x7F || (c == '?' && l == '?')) { + w = READ16LE("\\x"); + w |= "0123456789abcdef"[(c & 0xF0) >> 4] << 020; + w |= "0123456789abcdef"[(c & 0x0F) >> 0] << 030; + } else { + w = c; + } + break; + } + WRITE32LE(p, w); + p += (bsr(w) >> 3) + 1; + l = w; + } + return p - d; +} + +static void linenoiseEditInsertEscape(struct linenoiseState *l) { + size_t m; + ssize_t n; + char seq[16]; + char esc[sizeof(seq) * 4]; + if ((n = linenoiseRead(l->ifd, seq, sizeof(seq), l)) > 0) { + m = linenoiseEscape(esc, seq, n); + linenoiseEditInsert(l, esc, m); + } +} + +static void linenoiseEditInterrupt(struct linenoiseState *l) { + gotint = SIGINT; +} + +static void linenoiseEditQuit(struct linenoiseState *l) { + gotint = SIGQUIT; +} + +static void linenoiseEditSuspend(struct linenoiseState *l) { + raise(SIGSTOP); +} + +static void linenoiseEditPause(struct linenoiseState *l) { + tcflow(l->ofd, TCOOFF); + ispaused = 1; +} + +static void linenoiseEditCtrlq(struct linenoiseState *l) { + if (ispaused) { + linenoiseUnpause(l->ofd); + linenoiseRefreshLineForce(l); + } else { + linenoiseEditInsertEscape(l); + } +} + /** * Runs linenoise engine. * @@ -1502,7 +1748,6 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, size_t nread; char *p, seq[16]; struct linenoiseState l; - linenoiseHintsCallback *hc; bzero(&l, sizeof(l)); if (!(l.buf = malloc((l.buflen = 32)))) return -1; l.buf[0] = 0; @@ -1512,7 +1757,7 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, l.ws = GetTerminalSize(l.ws, l.ifd, l.ofd); linenoiseHistoryAdd(""); linenoiseWriteStr(l.ofd, l.prompt); - while (1) { + for (;;) { if (l.dirty) linenoiseRefreshLineForce(&l); rc = linenoiseRead(l.ifd, seq, sizeof(seq), &l); if (rc > 0) { @@ -1531,8 +1776,7 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, seq[0] = '\r'; seq[1] = 0; } else { - historylen--; - free(history[historylen]); + free(history[--historylen]); history[historylen] = 0; free(l.buf); return -1; @@ -1545,11 +1789,16 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, CASE(CTRL('B'), linenoiseEditLeft(&l)); CASE(CTRL('@'), linenoiseEditMark(&l)); CASE(CTRL('Y'), linenoiseEditYank(&l)); + CASE(CTRL('Q'), linenoiseEditCtrlq(&l)); CASE(CTRL('F'), linenoiseEditRight(&l)); + CASE(CTRL('\\'), linenoiseEditQuit(&l)); + CASE(CTRL('S'), linenoiseEditPause(&l)); CASE(CTRL('?'), linenoiseEditRubout(&l)); CASE(CTRL('H'), linenoiseEditRubout(&l)); CASE(CTRL('L'), linenoiseEditRefresh(&l)); + CASE(CTRL('Z'), linenoiseEditSuspend(&l)); CASE(CTRL('U'), linenoiseEditKillLeft(&l)); + CASE(CTRL('C'), linenoiseEditInterrupt(&l)); CASE(CTRL('T'), linenoiseEditTranspose(&l)); CASE(CTRL('K'), linenoiseEditKillRight(&l)); CASE(CTRL('W'), linenoiseEditRuboutWord(&l)); @@ -1569,13 +1818,11 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, } break; case '\r': + l.final = 1; free(history[--historylen]); history[historylen] = 0; linenoiseEditEnd(&l); - hc = hintsCallback; - hintsCallback = 0; linenoiseRefreshLineForce(&l); - hintsCallback = hc; if ((p = realloc(l.buf, l.len + 1))) l.buf = p; *obuf = l.buf; return l.len; @@ -1594,6 +1841,8 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, CASE('u', linenoiseEditUppercaseWord(&l)); CASE('c', linenoiseEditCapitalizeWord(&l)); CASE('t', linenoiseEditTransposeWords(&l)); + CASE(CTRL('B'), linenoiseEditLeftExpr(&l)); + CASE(CTRL('F'), linenoiseEditRightExpr(&l)); CASE(CTRL('H'), linenoiseEditRuboutWord(&l)); case '[': if (nread < 3) break; @@ -1634,6 +1883,31 @@ static ssize_t linenoiseEdit(int stdin_fd, int stdout_fd, const char *prompt, break; } break; + case 033: + if (nread < 3) break; + switch (seq[2]) { + case '[': + if (nread < 4) break; + switch (seq[3]) { + CASE('C', linenoiseEditRightExpr(&l)); /* \e\e[C alt-right */ + CASE('D', linenoiseEditLeftExpr(&l)); /* \e\e[D alt-left */ + default: + break; + } + break; + case 'O': + if (nread < 4) break; + switch (seq[3]) { + CASE('C', linenoiseEditRightExpr(&l)); /* \e\eOC alt-right */ + CASE('D', linenoiseEditLeftExpr(&l)); /* \e\eOD alt-left */ + default: + break; + } + break; + default: + break; + } + break; default: break; } diff --git a/third_party/python/Include/ceval.h b/third_party/python/Include/ceval.h index fea339d07..61721bdb3 100644 --- a/third_party/python/Include/ceval.h +++ b/third_party/python/Include/ceval.h @@ -2,7 +2,6 @@ #define Py_CEVAL_H #include "libc/bits/likely.h" #include "libc/dce.h" -#include "libc/log/libfatal.internal.h" #include "third_party/python/Include/object.h" #include "third_party/python/Include/pyerrors.h" #include "third_party/python/Include/pystate.h" @@ -102,31 +101,34 @@ int Py_GetRecursionLimit(void); #define _Py_MakeEndRecCheck(x) \ (--(x) < _Py_RecursionLimitLowerWaterMark(_Py_CheckRecursionLimit)) -int _Py_CheckRecursiveCall(const char *); +int Py_EnterRecursiveCall(const char *); +void Py_LeaveRecursiveCall(void); + +#ifndef Py_LIMITED_API extern int _Py_CheckRecursionLimit; - -forceinline int Py_EnterRecursiveCall(const char *where) { - const char *rsp, *bot; - if (!IsTiny()) { - if (IsModeDbg()) { - PyThreadState_GET()->recursion_depth++; - return _Py_CheckRecursiveCall(where); - } else { - rsp = __builtin_frame_address(0); - asm(".weak\tape_stack_vaddr\n\t" - "movabs\t$ape_stack_vaddr+32768,%0" : "=r"(bot)); - if (UNLIKELY(rsp < bot)) { - PyErr_Format(PyExc_MemoryError, "Stack overflow%s", where); - return -1; - } - } - } - return 0; -} - -forceinline void Py_LeaveRecursiveCall(void) { - PyThreadState_GET()->recursion_depth--; -} +int _Py_CheckRecursiveCall(const char *); +#define Py_LeaveRecursiveCall() PyThreadState_GET()->recursion_depth-- +#define Py_EnterRecursiveCall(where) \ + ({ \ + int rc = 0; \ + const char *rsp, *bot; \ + if (!IsTiny()) { \ + if (IsModeDbg()) { \ + PyThreadState_GET()->recursion_depth++; \ + rc = _Py_CheckRecursiveCall(where); \ + } else { \ + rsp = __builtin_frame_address(0); \ + asm(".weak\tape_stack_vaddr\n\t" \ + "movabs\t$ape_stack_vaddr+32768,%0" : "=r"(bot)); \ + if (UNLIKELY(rsp < bot)) { \ + PyErr_Format(PyExc_MemoryError, "Stack overflow%s", where); \ + rc = -1; \ + } \ + } \ + } \ + rc; \ + }) +#endif #define Py_ALLOW_RECURSION \ do { \ diff --git a/third_party/python/Include/pyctype.h b/third_party/python/Include/pyctype.h index 15f8f3923..05ea304b3 100644 --- a/third_party/python/Include/pyctype.h +++ b/third_party/python/Include/pyctype.h @@ -6,36 +6,50 @@ #define Py_TOLOWER(c) kToLower[255 & (c)] #define Py_TOUPPER(c) kToUpper[255 & (c)] -forceinline bool Py_ISDIGIT(unsigned char c) { - return '0' <= c && c <= '9'; -} +#define Py_ISDIGIT(C) \ + ({ \ + unsigned char c_ = (C); \ + '0' <= c_&& c_ <= '9'; \ + }) -forceinline bool Py_ISLOWER(unsigned char c) { - return 'a' <= c && c <= 'z'; -} +#define Py_ISLOWER(C) \ + ({ \ + unsigned char c_ = (C); \ + 'a' <= c_&& c_ <= 'z'; \ + }) -forceinline bool Py_ISUPPER(unsigned char c) { - return 'A' <= c && c <= 'Z'; -} +#define Py_ISUPPER(C) \ + ({ \ + unsigned char c_ = (C); \ + 'A' <= c_&& c_ <= 'Z'; \ + }) -forceinline bool Py_ISALPHA(unsigned char c) { - return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); -} +#define Py_ISALPHA(C) \ + ({ \ + unsigned char c_ = (C); \ + ('A' <= c_ && c_ <= 'Z') || ('a' <= c_ && c_ <= 'z'); \ + }) -forceinline bool Py_ISALNUM(unsigned char c) { - return ('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || - ('a' <= c && c <= 'z'); -} +#define Py_ISALNUM(C) \ + ({ \ + unsigned char c_ = (C); \ + (('0' <= c_ && c_ <= '9') || ('A' <= c_ && c_ <= 'Z') || \ + ('a' <= c_ && c_ <= 'z')); \ + }) -forceinline bool Py_ISSPACE(unsigned char c) { - return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f' || - c == '\v'; -} +#define Py_ISSPACE(C) \ + ({ \ + unsigned char c_ = (C); \ + (c_ == ' ' || c_ == '\t' || c_ == '\r' || c_ == '\n' || c_ == '\f' || \ + c_ == '\v'); \ + }) -forceinline bool Py_ISXDIGIT(unsigned char c) { - return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || - ('a' <= c && c <= 'f'); -} +#define Py_ISXDIGIT(C) \ + ({ \ + unsigned char c_ = (C); \ + (('0' <= c_ && c_ <= '9') || ('A' <= c_ && c_ <= 'F') || \ + ('a' <= c_ && c_ <= 'f')); \ + }) #endif /* !PYCTYPE_H */ #endif /* !Py_LIMITED_API */ diff --git a/third_party/python/Lib/test/test_class.py b/third_party/python/Lib/test/test_class.py index b98b4d176..b6a3eb4bf 100644 --- a/third_party/python/Lib/test/test_class.py +++ b/third_party/python/Lib/test/test_class.py @@ -490,6 +490,7 @@ class ClassTests(unittest.TestCase): self.assertRaises(TypeError, hash, C2()) + @unittest.skipIf(cosmo.MODE == 'tiny', "no stack awareness in tiny mode") def testSFBug532646(self): # Test for SF bug 532646 diff --git a/third_party/python/Lib/test/test_codecs.py b/third_party/python/Lib/test/test_codecs.py index 926922a73..470840057 100644 --- a/third_party/python/Lib/test/test_codecs.py +++ b/third_party/python/Lib/test/test_codecs.py @@ -1331,7 +1331,7 @@ class EscapeDecodeTest(unittest.TestCase): check(br"[\x410]", b"[A0]") for i in range(97, 123): b = bytes([i]) - if b not in b'abfnrtvx': + if b not in b'abfnrtvxe': # [jart] support \e with self.assertWarns(DeprecationWarning): check(b"\\" + b, b"\\" + b) with self.assertWarns(DeprecationWarning): @@ -2603,7 +2603,7 @@ class UnicodeEscapeTest(unittest.TestCase): check(br"\U0001d120", "\U0001d120") for i in range(97, 123): b = bytes([i]) - if b not in b'abfnrtuvx': + if b not in b'abfnrtuvxe': # [jart] support \e with self.assertWarns(DeprecationWarning): check(b"\\" + b, "\\" + chr(i)) if b.upper() not in b'UN': diff --git a/third_party/python/Lib/test/test_compile.py b/third_party/python/Lib/test/test_compile.py index a3d01cc12..3a8df2d78 100644 --- a/third_party/python/Lib/test/test_compile.py +++ b/third_party/python/Lib/test/test_compile.py @@ -163,8 +163,12 @@ if 1: for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", "080000000000000", "000000000000009", "000000000000008", "0b42", "0BADCAFE", "0o123456789", "0b1.1", "0o4.2", - "0b101j2", "0o153j2", "0b100e1", "0o777e1", "0777", - "000777", "000000000000007"]: + "0b101j2", "0o153j2", "0b100e1", "0o777e1", + # [jart] restore octal + # "0777", + # "000777", + # "000000000000007", + ]: self.assertRaises(SyntaxError, eval, arg) self.assertEqual(eval("0xff"), 255) diff --git a/third_party/python/Lib/test/test_fileio.py b/third_party/python/Lib/test/test_fileio.py index 2fcedfeb9..788cc8723 100644 --- a/third_party/python/Lib/test/test_fileio.py +++ b/third_party/python/Lib/test/test_fileio.py @@ -177,7 +177,7 @@ class AutoFileTests: finally: os.close(fd) - # @unittest.skipUnless(cosmo.MODE == "dbg", "disabled recursion checking") + @unittest.skipIf(cosmo.MODE == 'tiny', "no stack awareness in tiny mode") def testRecursiveRepr(self): # Issue #25455 with swap_attr(self.f, 'name', self.f): diff --git a/third_party/python/Lib/test/test_plistlib.py b/third_party/python/Lib/test/test_plistlib.py index d3473727c..9655615fa 100644 --- a/third_party/python/Lib/test/test_plistlib.py +++ b/third_party/python/Lib/test/test_plistlib.py @@ -814,6 +814,7 @@ class TestBinaryPlistlib(unittest.TestCase): b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY)) self.assertIs(b['x'], b) + @unittest.skipIf(cosmo.MODE == 'tiny', "no stack awareness in tiny mode") def test_deep_nesting(self): for N in [300, 100000]: chunks = [b'\xa1' + (i + 1).to_bytes(4, 'big') for i in range(N)] diff --git a/third_party/python/Lib/test/test_scratch.py b/third_party/python/Lib/test/test_scratch.py index e0d692311..ca96683fb 100644 --- a/third_party/python/Lib/test/test_scratch.py +++ b/third_party/python/Lib/test/test_scratch.py @@ -8,10 +8,10 @@ exit1 = cosmo.exit1 class BooTest(unittest.TestCase): def test_boo(self): - pass # cosmo.ftrace() - # chr(33) + # eval('0') # exit1() + pass if __name__ == '__main__': unittest.main() diff --git a/third_party/python/Modules/_decimal/_decimal.c b/third_party/python/Modules/_decimal/_decimal.c index 85f9ffdce..2771f9ed7 100644 --- a/third_party/python/Modules/_decimal/_decimal.c +++ b/third_party/python/Modules/_decimal/_decimal.c @@ -28,7 +28,6 @@ │ EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/fmt/fmt.h" -#include "libc/log/libfatal.internal.h" #include "third_party/python/Include/abstract.h" #include "third_party/python/Include/boolobject.h" #include "third_party/python/Include/complexobject.h" diff --git a/third_party/python/Modules/_decimal/libmpdec/umodarith.h b/third_party/python/Modules/_decimal/libmpdec/umodarith.h index 214822a19..5f55c90fe 100644 --- a/third_party/python/Modules/_decimal/libmpdec/umodarith.h +++ b/third_party/python/Modules/_decimal/libmpdec/umodarith.h @@ -1,6 +1,5 @@ #ifndef UMODARITH_H #define UMODARITH_H -#include "libc/log/libfatal.internal.h" #include "third_party/python/Modules/_decimal/libmpdec/constants.h" #include "third_party/python/Modules/_decimal/libmpdec/mpdecimal.h" #include "third_party/python/Modules/_decimal/libmpdec/typearith.h" diff --git a/third_party/python/Modules/_hashmbedtls.c b/third_party/python/Modules/_hashmbedtls.c index ec8c7b7f7..31d3149bb 100644 --- a/third_party/python/Modules/_hashmbedtls.c +++ b/third_party/python/Modules/_hashmbedtls.c @@ -18,7 +18,6 @@ #define PY_SSIZE_T_CLEAN #include "libc/calls/calls.h" #include "libc/log/backtrace.internal.h" -#include "libc/log/libfatal.internal.h" #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" @@ -27,7 +26,6 @@ #include "third_party/mbedtls/md.h" #include "third_party/mbedtls/pkcs5.h" #include "third_party/python/Include/Python.h" -#include "third_party/python/Include/ezprint.h" #include "third_party/python/Include/import.h" #include "third_party/python/Include/object.h" #include "third_party/python/Include/pyerrors.h" diff --git a/third_party/python/Modules/_testcapimodule.c b/third_party/python/Modules/_testcapimodule.c index 4fb27e192..8761a8038 100644 --- a/third_party/python/Modules/_testcapimodule.c +++ b/third_party/python/Modules/_testcapimodule.c @@ -1629,7 +1629,7 @@ getargs_u(PyObject *self, PyObject *args) Py_ssize_t size; if (!PyArg_ParseTuple(args, "u", &str)) return NULL; - size = Py_UNICODE_strlen(str); + size = wcslen(str); return PyUnicode_FromUnicode(str, size); } @@ -1651,7 +1651,7 @@ getargs_Z(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "Z", &str)) return NULL; if (str != NULL) { - size = Py_UNICODE_strlen(str); + size = wcslen(str); return PyUnicode_FromUnicode(str, size); } else Py_RETURN_NONE; diff --git a/third_party/python/Modules/audioop.c b/third_party/python/Modules/audioop.c index 78d73d182..107bf7b2e 100644 --- a/third_party/python/Modules/audioop.c +++ b/third_party/python/Modules/audioop.c @@ -849,7 +849,34 @@ audioop_add_impl(PyObject *module, Py_buffer *fragment1, if (rv == NULL) return NULL; ncp = (signed char *)PyBytes_AsString(rv); - for (i = 0; i < fragment1->len; i += width) { + i = 0; +#if defined(__GNUC__) && defined(__SSE2__) + /* [jart] make audio mixing 20x faster */ + if (width == 2) { + for (; i + 16 <= fragment1->len; i += 16) { + asm("movups\t%1,%%xmm0\n\t" + "movups\t%2,%%xmm1\n\t" + "paddsw\t%%xmm1,%%xmm0\n\t" + "movups\t%%xmm0,%0" + : "=m"(*(char(*)[16])(ncp + i)) + : "m"(*(char(*)[16])((char *)fragment1->buf + i)), + "m"(*(char(*)[16])((char *)fragment2->buf + i)) + : "xmm0", "xmm1"); + } + } else if (width == 1) { + for (; i + 16 <= fragment1->len; i += 16) { + asm("movups\t%1,%%xmm0\n\t" + "movups\t%2,%%xmm1\n\t" + "paddsb\t%%xmm1,%%xmm0\n\t" + "movups\t%%xmm0,%0" + : "=m"(*(char(*)[16])(ncp + i)) + : "m"(*(char(*)[16])((char *)fragment1->buf + i)), + "m"(*(char(*)[16])((char *)fragment2->buf + i)) + : "xmm0", "xmm1"); + } + } +#endif + for (; i < fragment1->len; i += width) { int val1 = GETRAWSAMPLE(width, fragment1->buf, i); int val2 = GETRAWSAMPLE(width, fragment2->buf, i); if (width < 4) { diff --git a/third_party/python/Modules/tlsmodule.c b/third_party/python/Modules/tlsmodule.c index 78349778c..58afc1c87 100644 --- a/third_party/python/Modules/tlsmodule.c +++ b/third_party/python/Modules/tlsmodule.c @@ -19,7 +19,6 @@ #include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/errno.h" -#include "libc/log/libfatal.internal.h" #include "libc/macros.internal.h" #include "libc/runtime/gc.internal.h" #include "libc/str/str.h" diff --git a/third_party/python/Objects/bytesobject.c b/third_party/python/Objects/bytesobject.c index 0afbc18a6..38bdbbbc6 100644 --- a/third_party/python/Objects/bytesobject.c +++ b/third_party/python/Objects/bytesobject.c @@ -1193,6 +1193,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s, case 'n': *p++ = '\n'; break; case 'r': *p++ = '\r'; break; case 'v': *p++ = '\013'; break; /* VT */ + case 'e': *p++ = '\033'; break; /* [jart] ansi escape */ case 'a': *p++ = '\007'; break; /* BEL, not classic C */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': diff --git a/third_party/python/Objects/unicodeobject-deadcode.c b/third_party/python/Objects/unicodeobject-deadcode.c new file mode 100644 index 000000000..9d6b662bf --- /dev/null +++ b/third_party/python/Objects/unicodeobject-deadcode.c @@ -0,0 +1,431 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Python 3 │ +│ https://docs.python.org/3/license.html │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#define PY_SSIZE_T_CLEAN +#include "libc/assert.h" +#include "third_party/python/Include/codecs.h" +#include "third_party/python/Include/pyerrors.h" +#include "third_party/python/Include/pymem.h" +#include "third_party/python/Include/unicodeobject.h" +#include "third_party/python/Include/warnings.h" +/* clang-format off */ + +#define _PyUnicode_STATE(op) \ + (((PyASCIIObject *)(op))->state) + +int ensure_unicode(PyObject *); +PyObject *unicode_result(PyObject *); +int unicode_check_modifiable(PyObject *); +PyObject *unicode_encode_ucs1(PyObject *, const char *, const Py_UCS4); +PyObject *_PyUnicode_TranslateCharmap(PyObject *, PyObject *, const char *); + +/* The max unicode value is always 0x10FFFF while using the PEP-393 API. + This function is kept for backward compatibility with the old API. */ +Py_UNICODE +PyUnicode_GetMax(void) +{ +#ifdef Py_UNICODE_WIDE + return 0x10FFFF; +#else + /* This is actually an illegal character, so it should + not be passed to unichr. */ + return 0xFFFF; +#endif +} + +PyObject * +PyUnicode_AsDecodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_AsDecodedObject() is deprecated; " + "use PyCodec_Decode() to decode from str", 1) < 0) + return NULL; + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + /* Decode via the codec registry */ + return PyCodec_Decode(unicode, encoding, errors); +} + +PyObject * +PyUnicode_AsDecodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_AsDecodedUnicode() is deprecated; " + "use PyCodec_Decode() to decode from str to str", 1) < 0) + return NULL; + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + /* Decode via the codec registry */ + v = PyCodec_Decode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "'%.400s' decoder returned '%.400s' instead of 'str'; " + "use codecs.decode() to decode to arbitrary types", + encoding, + Py_TYPE(unicode)->tp_name); + Py_DECREF(v); + goto onError; + } + return unicode_result(v); + onError: + return NULL; +} + +PyObject * +PyUnicode_AsEncodedObject(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_AsEncodedObject() is deprecated; " + "use PyUnicode_AsEncodedString() to encode from str to bytes " + "or PyCodec_Encode() for generic encoding", 1) < 0) + return NULL; + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + return v; + onError: + return NULL; +} + +PyObject * +PyUnicode_AsEncodedUnicode(PyObject *unicode, + const char *encoding, + const char *errors) +{ + PyObject *v; + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_AsEncodedUnicode() is deprecated; " + "use PyCodec_Encode() to encode from str to str", 1) < 0) + return NULL; + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + /* Encode via the codec registry */ + v = PyCodec_Encode(unicode, encoding, errors); + if (v == NULL) + goto onError; + if (!PyUnicode_Check(v)) { + PyErr_Format(PyExc_TypeError, + "'%.400s' encoder returned '%.400s' instead of 'str'; " + "use codecs.encode() to encode to arbitrary types", + encoding, + Py_TYPE(v)->tp_name); + Py_DECREF(v); + goto onError; + } + return v; + onError: + return NULL; +} + +wchar_t * +_PyUnicode_AsWideCharString(PyObject *unicode) +{ + const wchar_t *wstr; + wchar_t *buffer; + Py_ssize_t buflen; + if (unicode == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen); + if (wstr == NULL) { + return NULL; + } + if (wcslen(wstr) != (size_t)buflen) { + PyErr_SetString(PyExc_ValueError, + "embedded null character"); + return NULL; + } + buffer = PyMem_NEW(wchar_t, buflen + 1); + if (buffer == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t)); + return buffer; +} + +const Py_UNICODE * +_PyUnicode_AsUnicode(PyObject *unicode) +{ + Py_ssize_t size; + const Py_UNICODE *wstr; + wstr = PyUnicode_AsUnicodeAndSize(unicode, &size); + if (wstr && wcslen(wstr) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return wstr; +} + +Py_ssize_t +PyUnicode_GetSize(PyObject *unicode) +{ + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + goto onError; + } + return PyUnicode_GET_SIZE(unicode); + onError: + return -1; +} + +int +PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch) +{ + if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) { + PyErr_BadArgument(); + return -1; + } + assert(PyUnicode_IS_READY(unicode)); + if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } + if (unicode_check_modifiable(unicode)) + return -1; + if (ch > PyUnicode_MAX_CHAR_VALUE(unicode)) { + PyErr_SetString(PyExc_ValueError, "character out of range"); + return -1; + } + PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), + index, ch); + return 0; +} + +/* Deprecated */ +PyObject * +PyUnicode_EncodeLatin1(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + result = unicode_encode_ucs1(unicode, errors, 256); + Py_DECREF(unicode); + return result; +} + +/* Deprecated */ +PyObject * +PyUnicode_EncodeASCII(const Py_UNICODE *p, + Py_ssize_t size, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + result = unicode_encode_ucs1(unicode, errors, 128); + Py_DECREF(unicode); + return result; +} + +PyObject * +PyUnicode_Encode(const Py_UNICODE *s, + Py_ssize_t size, + const char *encoding, + const char *errors) +{ + PyObject *v, *unicode; + unicode = PyUnicode_FromUnicode(s, size); + if (unicode == NULL) + return NULL; + v = PyUnicode_AsEncodedString(unicode, encoding, errors); + Py_DECREF(unicode); + return v; +} + +/* Deprecated */ +PyObject * +PyUnicode_EncodeCharmap(const Py_UNICODE *p, + Py_ssize_t size, + PyObject *mapping, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (unicode == NULL) + return NULL; + result = _PyUnicode_EncodeCharmap(unicode, mapping, errors); + Py_DECREF(unicode); + return result; +} + +/* Deprecated. Use PyUnicode_Translate instead. */ +PyObject * +PyUnicode_TranslateCharmap(const Py_UNICODE *p, + Py_ssize_t size, + PyObject *mapping, + const char *errors) +{ + PyObject *result; + PyObject *unicode = PyUnicode_FromUnicode(p, size); + if (!unicode) + return NULL; + result = _PyUnicode_TranslateCharmap(unicode, mapping, errors); + Py_DECREF(unicode); + return result; +} + +void +PyUnicode_InternImmortal(PyObject **p) +{ + PyUnicode_InternInPlace(p); + if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { + _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; + Py_INCREF(*p); + } +} + +Py_UNICODE* +Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)); + return s1; +} + +Py_UNICODE* +Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE *u = s1; + while ((*u++ = *s2++)) + if (n-- == 0) + break; + return s1; +} + +Py_UNICODE* +Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2) +{ + Py_UNICODE *u1 = s1; + u1 += Py_UNICODE_strlen(u1); + Py_UNICODE_strcpy(u1, s2); + return s1; +} + +int +Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) +{ + while (*s1 && *s2 && *s1 == *s2) + s1++, s2++; + if (*s1 && *s2) + return (*s1 < *s2) ? -1 : +1; + if (*s1) + return 1; + if (*s2) + return -1; + return 0; +} + +int +Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) +{ + Py_UNICODE u1, u2; + for (; n != 0; n--) { + u1 = *s1; + u2 = *s2; + if (u1 != u2) + return (u1 < u2) ? -1 : +1; + if (u1 == '\0') + return 0; + s1++; + s2++; + } + return 0; +} + +Py_UNICODE* +Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + for (p = s; *p; p++) + if (*p == c) + return (Py_UNICODE*)p; + return NULL; +} + +Py_UNICODE* +Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) +{ + const Py_UNICODE *p; + p = s + Py_UNICODE_strlen(s); + while (p != s) { + p--; + if (*p == c) + return (Py_UNICODE*)p; + } + return NULL; +} + +size_t +Py_UNICODE_strlen(const Py_UNICODE *u) +{ + int res = 0; + while(*u++) + res++; + return res; +} + +Py_UNICODE* +PyUnicode_AsUnicodeCopy(PyObject *unicode) +{ + Py_UNICODE *u, *copy; + Py_ssize_t len, size; + if (!PyUnicode_Check(unicode)) { + PyErr_BadArgument(); + return NULL; + } + u = PyUnicode_AsUnicodeAndSize(unicode, &len); + if (u == NULL) + return NULL; + /* Ensure we won't overflow the size. */ + if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { + PyErr_NoMemory(); + return NULL; + } + size = len + 1; /* copy the null character */ + size *= sizeof(Py_UNICODE); + copy = PyMem_Malloc(size); + if (copy == NULL) { + PyErr_NoMemory(); + return NULL; + } + memcpy(copy, u, size); + return copy; +} diff --git a/third_party/python/Objects/unicodeobject.c b/third_party/python/Objects/unicodeobject.c index a7e56547d..b9efe7063 100644 --- a/third_party/python/Objects/unicodeobject.c +++ b/third_party/python/Objects/unicodeobject.c @@ -447,20 +447,6 @@ get_error_handler(const char *errors) return _Py_ERROR_OTHER; } -/* The max unicode value is always 0x10FFFF while using the PEP-393 API. - This function is kept for backward compatibility with the old API. */ -Py_UNICODE -PyUnicode_GetMax(void) -{ -#ifdef Py_UNICODE_WIDE - return 0x10FFFF; -#else - /* This is actually an illegal character, so it should - not be passed to unichr. */ - return 0xFFFF; -#endif -} - #ifdef Py_DEBUG int _PyUnicode_CheckConsistency(PyObject *op, int check_content) @@ -616,7 +602,6 @@ static PyObject* unicode_result_ready(PyObject *unicode) { Py_ssize_t length; - length = PyUnicode_GET_LENGTH(unicode); if (length == 0) { if (unicode != unicode_empty) { @@ -625,7 +610,6 @@ unicode_result_ready(PyObject *unicode) } return unicode_empty; } - if (length == 1) { void *data = PyUnicode_DATA(unicode); int kind = PyUnicode_KIND(unicode); @@ -647,12 +631,11 @@ unicode_result_ready(PyObject *unicode) } } } - assert(_PyUnicode_CheckConsistency(unicode, 1)); return unicode; } -static PyObject* +PyObject* unicode_result(PyObject *unicode) { assert(_PyUnicode_CHECK(unicode)); @@ -1471,7 +1454,7 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end, } #endif -static int +int unicode_check_modifiable(PyObject *unicode) { if (!unicode_modifiable(unicode)) { @@ -3176,37 +3159,6 @@ PyUnicode_AsWideCharString(PyObject *unicode, return buffer; } -wchar_t* -_PyUnicode_AsWideCharString(PyObject *unicode) -{ - const wchar_t *wstr; - wchar_t *buffer; - Py_ssize_t buflen; - - if (unicode == NULL) { - PyErr_BadInternalCall(); - return NULL; - } - - wstr = PyUnicode_AsUnicodeAndSize(unicode, &buflen); - if (wstr == NULL) { - return NULL; - } - if (wcslen(wstr) != (size_t)buflen) { - PyErr_SetString(PyExc_ValueError, - "embedded null character"); - return NULL; - } - - buffer = PyMem_NEW(wchar_t, buflen + 1); - if (buffer == NULL) { - PyErr_NoMemory(); - return NULL; - } - memcpy(buffer, wstr, (buflen + 1) * sizeof(wchar_t)); - return buffer; -} - PyObject * PyUnicode_FromOrdinal(int ordinal) { @@ -3409,113 +3361,6 @@ PyUnicode_Decode(const char *s, return NULL; } -PyObject * -PyUnicode_AsDecodedObject(PyObject *unicode, - const char *encoding, - const char *errors) -{ - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedObject() is deprecated; " - "use PyCodec_Decode() to decode from str", 1) < 0) - return NULL; - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Decode via the codec registry */ - return PyCodec_Decode(unicode, encoding, errors); -} - -PyObject * -PyUnicode_AsDecodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsDecodedUnicode() is deprecated; " - "use PyCodec_Decode() to decode from str to str", 1) < 0) - return NULL; - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Decode via the codec registry */ - v = PyCodec_Decode(unicode, encoding, errors); - if (v == NULL) - goto onError; - if (!PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "'%.400s' decoder returned '%.400s' instead of 'str'; " - "use codecs.decode() to decode to arbitrary types", - encoding, - Py_TYPE(unicode)->tp_name); - Py_DECREF(v); - goto onError; - } - return unicode_result(v); - - onError: - return NULL; -} - -PyObject * -PyUnicode_Encode(const Py_UNICODE *s, - Py_ssize_t size, - const char *encoding, - const char *errors) -{ - PyObject *v, *unicode; - unicode = PyUnicode_FromUnicode(s, size); - if (unicode == NULL) - return NULL; - v = PyUnicode_AsEncodedString(unicode, encoding, errors); - Py_DECREF(unicode); - return v; -} - -PyObject * -PyUnicode_AsEncodedObject(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedObject() is deprecated; " - "use PyUnicode_AsEncodedString() to encode from str to bytes " - "or PyCodec_Encode() for generic encoding", 1) < 0) - return NULL; - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); - if (v == NULL) - goto onError; - return v; - - onError: - return NULL; -} - static size_t wcstombs_errorpos(const wchar_t *wstr) { @@ -3824,45 +3669,6 @@ PyUnicode_AsEncodedString(PyObject *unicode, return NULL; } -PyObject * -PyUnicode_AsEncodedUnicode(PyObject *unicode, - const char *encoding, - const char *errors) -{ - PyObject *v; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - goto onError; - } - - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PyUnicode_AsEncodedUnicode() is deprecated; " - "use PyCodec_Encode() to encode from str to str", 1) < 0) - return NULL; - - if (encoding == NULL) - encoding = PyUnicode_GetDefaultEncoding(); - - /* Encode via the codec registry */ - v = PyCodec_Encode(unicode, encoding, errors); - if (v == NULL) - goto onError; - if (!PyUnicode_Check(v)) { - PyErr_Format(PyExc_TypeError, - "'%.400s' encoder returned '%.400s' instead of 'str'; " - "use codecs.encode() to encode to arbitrary types", - encoding, - Py_TYPE(v)->tp_name); - Py_DECREF(v); - goto onError; - } - return v; - - onError: - return NULL; -} - static size_t mbstowcs_errorpos(const char *str, size_t len) { @@ -4335,21 +4141,6 @@ PyUnicode_AsUnicode(PyObject *unicode) return PyUnicode_AsUnicodeAndSize(unicode, NULL); } -const Py_UNICODE * -_PyUnicode_AsUnicode(PyObject *unicode) -{ - Py_ssize_t size; - const Py_UNICODE *wstr; - - wstr = PyUnicode_AsUnicodeAndSize(unicode, &size); - if (wstr && wcslen(wstr) != (size_t)size) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } - return wstr; -} - - Py_ssize_t PyUnicode_GetSize(PyObject *unicode) { @@ -4358,7 +4149,6 @@ PyUnicode_GetSize(PyObject *unicode) goto onError; } return PyUnicode_GET_SIZE(unicode); - onError: return -1; } @@ -4397,29 +4187,6 @@ PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index) return PyUnicode_READ(kind, data, index); } -int -PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, Py_UCS4 ch) -{ - if (!PyUnicode_Check(unicode) || !PyUnicode_IS_COMPACT(unicode)) { - PyErr_BadArgument(); - return -1; - } - assert(PyUnicode_IS_READY(unicode)); - if (index < 0 || index >= PyUnicode_GET_LENGTH(unicode)) { - PyErr_SetString(PyExc_IndexError, "string index out of range"); - return -1; - } - if (unicode_check_modifiable(unicode)) - return -1; - if (ch > PyUnicode_MAX_CHAR_VALUE(unicode)) { - PyErr_SetString(PyExc_ValueError, "character out of range"); - return -1; - } - PyUnicode_WRITE(PyUnicode_KIND(unicode), PyUnicode_DATA(unicode), - index, ch); - return 0; -} - const char * PyUnicode_GetDefaultEncoding(void) { @@ -5063,6 +4830,7 @@ encode_char: return NULL; return v; } + PyObject * PyUnicode_EncodeUTF7(const Py_UNICODE *s, Py_ssize_t size, @@ -6169,7 +5937,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s, assert(writer.pos < writer.size); switch (c) { - /* \x escapes */ + /* \x escapes */ case '\n': continue; case '\\': WRITE_ASCII_CHAR('\\'); continue; case '\'': WRITE_ASCII_CHAR('\''); continue; @@ -6184,8 +5952,10 @@ _PyUnicode_DecodeUnicodeEscape(const char *s, case 'v': WRITE_ASCII_CHAR('\013'); continue; /* BEL, not classic C */ case 'a': WRITE_ASCII_CHAR('\007'); continue; + /* [jart] ansi escape */ + case 'e': WRITE_ASCII_CHAR('\033'); continue; - /* \OOO (octal) escapes */ + /* \OOO (octal) escapes */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': ch = c - '0'; @@ -6464,7 +6234,6 @@ PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, if (tmp == NULL) { return NULL; } - result = PyUnicode_AsUnicodeEscapeString(tmp); Py_DECREF(tmp); return result; @@ -6899,7 +6668,7 @@ unicode_encode_call_errorhandler(const char *errors, return resunicode; } -static PyObject * +PyObject * unicode_encode_ucs1(PyObject *unicode, const char *errors, const Py_UCS4 limit) @@ -7084,21 +6853,6 @@ unicode_encode_ucs1(PyObject *unicode, return NULL; } -/* Deprecated */ -PyObject * -PyUnicode_EncodeLatin1(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = unicode_encode_ucs1(unicode, errors, 256); - Py_DECREF(unicode); - return result; -} - PyObject * _PyUnicode_AsLatin1String(PyObject *unicode, const char *errors) { @@ -7225,21 +6979,6 @@ PyUnicode_DecodeASCII(const char *s, return NULL; } -/* Deprecated */ -PyObject * -PyUnicode_EncodeASCII(const Py_UNICODE *p, - Py_ssize_t size, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = unicode_encode_ucs1(unicode, errors, 128); - Py_DECREF(unicode); - return result; -} - PyObject * _PyUnicode_AsASCIIString(PyObject *unicode, const char *errors) { @@ -8075,22 +7814,6 @@ _PyUnicode_EncodeCharmap(PyObject *unicode, return NULL; } -/* Deprecated */ -PyObject * -PyUnicode_EncodeCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (unicode == NULL) - return NULL; - result = _PyUnicode_EncodeCharmap(unicode, mapping, errors); - Py_DECREF(unicode); - return result; -} - PyObject * PyUnicode_AsCharmapString(PyObject *unicode, PyObject *mapping) @@ -8395,7 +8118,7 @@ exit: return res; } -static PyObject * +PyObject * _PyUnicode_TranslateCharmap(PyObject *input, PyObject *mapping, const char *errors) @@ -8412,29 +8135,23 @@ _PyUnicode_TranslateCharmap(PyObject *input, PyObject *exc = NULL; int ignore; int res; - if (mapping == NULL) { PyErr_BadArgument(); return NULL; } - if (PyUnicode_READY(input) == -1) return NULL; data = (char*)PyUnicode_DATA(input); kind = PyUnicode_KIND(input); size = PyUnicode_GET_LENGTH(input); - if (size == 0) return PyUnicode_FromObject(input); - /* allocate enough for a simple 1:1 translation without replacements, if we need more, we'll resize */ _PyUnicodeWriter_Init(&writer); if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - ignore = (errors != NULL && strcmp(errors, "ignore") == 0); - if (PyUnicode_READY(input) == -1) return NULL; if (PyUnicode_IS_ASCII(input)) { @@ -8449,7 +8166,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, else { i = 0; } - while (i adjust input pointer */ ++i; continue; } - /* untranslatable character */ collstart = i; collend = i+1; - /* find all untranslatable characters */ while (collend < size) { PyObject *x; @@ -8486,7 +8198,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, break; ++collend; } - if (ignore) { i = collend; } @@ -8507,7 +8218,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, Py_XDECREF(exc); Py_XDECREF(errorHandler); return _PyUnicodeWriter_Finish(&writer); - onError: _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(exc); @@ -8515,22 +8225,6 @@ _PyUnicode_TranslateCharmap(PyObject *input, return NULL; } -/* Deprecated. Use PyUnicode_Translate instead. */ -PyObject * -PyUnicode_TranslateCharmap(const Py_UNICODE *p, - Py_ssize_t size, - PyObject *mapping, - const char *errors) -{ - PyObject *result; - PyObject *unicode = PyUnicode_FromUnicode(p, size); - if (!unicode) - return NULL; - result = _PyUnicode_TranslateCharmap(unicode, mapping, errors); - Py_DECREF(unicode); - return result; -} - PyObject * PyUnicode_Translate(PyObject *str, PyObject *mapping, @@ -8946,7 +8640,6 @@ _PyUnicode_InsertThousandsGrouping( return count; } - Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, @@ -8957,21 +8650,17 @@ PyUnicode_Count(PyObject *str, int kind1, kind2; void *buf1 = NULL, *buf2 = NULL; Py_ssize_t len1, len2; - if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) return -1; - kind1 = PyUnicode_KIND(str); kind2 = PyUnicode_KIND(substr); if (kind1 < kind2) return 0; - len1 = PyUnicode_GET_LENGTH(str); len2 = PyUnicode_GET_LENGTH(substr); ADJUST_INDICES(start, end, len1); if (end - start < len2) return 0; - buf1 = PyUnicode_DATA(str); buf2 = PyUnicode_DATA(substr); if (kind2 != kind1) { @@ -8979,7 +8668,6 @@ PyUnicode_Count(PyObject *str, if (!buf2) goto onError; } - switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr)) @@ -9008,10 +8696,8 @@ PyUnicode_Count(PyObject *str, default: assert(0); result = 0; } - if (kind2 != kind1) PyMem_Free(buf2); - return result; onError: if (kind2 != kind1 && buf2) @@ -14751,16 +14437,6 @@ PyUnicode_InternInPlace(PyObject **p) _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; } -void -PyUnicode_InternImmortal(PyObject **p) -{ - PyUnicode_InternInPlace(p); - if (PyUnicode_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; - Py_INCREF(*p); - } -} - PyObject * PyUnicode_InternFromString(const char *cp) { @@ -14771,14 +14447,13 @@ PyUnicode_InternFromString(const char *cp) return s; } -void +relegated void _Py_ReleaseInternedUnicodeStrings(void) { PyObject *keys; PyObject *s; Py_ssize_t i, n; Py_ssize_t immortal_size = 0, mortal_size = 0; - if (interned == NULL || !PyDict_Check(interned)) return; keys = PyDict_Keys(interned); @@ -14786,12 +14461,10 @@ _Py_ReleaseInternedUnicodeStrings(void) PyErr_Clear(); return; } - /* Since _Py_ReleaseInternedUnicodeStrings() is intended to help a leak detector, interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the interned dict. */ - n = PyList_GET_SIZE(keys); fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", n); @@ -14826,7 +14499,6 @@ _Py_ReleaseInternedUnicodeStrings(void) Py_CLEAR(interned); } - /********************* Unicode Iterator **************************/ typedef struct { @@ -14985,126 +14657,6 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } - -size_t -Py_UNICODE_strlen(const Py_UNICODE *u) -{ - int res = 0; - while(*u++) - res++; - return res; -} - -Py_UNICODE* -Py_UNICODE_strcpy(Py_UNICODE *s1, const Py_UNICODE *s2) -{ - Py_UNICODE *u = s1; - while ((*u++ = *s2++)); - return s1; -} - -Py_UNICODE* -Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) -{ - Py_UNICODE *u = s1; - while ((*u++ = *s2++)) - if (n-- == 0) - break; - return s1; -} - -Py_UNICODE* -Py_UNICODE_strcat(Py_UNICODE *s1, const Py_UNICODE *s2) -{ - Py_UNICODE *u1 = s1; - u1 += Py_UNICODE_strlen(u1); - Py_UNICODE_strcpy(u1, s2); - return s1; -} - -int -Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) -{ - while (*s1 && *s2 && *s1 == *s2) - s1++, s2++; - if (*s1 && *s2) - return (*s1 < *s2) ? -1 : +1; - if (*s1) - return 1; - if (*s2) - return -1; - return 0; -} - -int -Py_UNICODE_strncmp(const Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) -{ - Py_UNICODE u1, u2; - for (; n != 0; n--) { - u1 = *s1; - u2 = *s2; - if (u1 != u2) - return (u1 < u2) ? -1 : +1; - if (u1 == '\0') - return 0; - s1++; - s2++; - } - return 0; -} - -Py_UNICODE* -Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) -{ - const Py_UNICODE *p; - for (p = s; *p; p++) - if (*p == c) - return (Py_UNICODE*)p; - return NULL; -} - -Py_UNICODE* -Py_UNICODE_strrchr(const Py_UNICODE *s, Py_UNICODE c) -{ - const Py_UNICODE *p; - p = s + Py_UNICODE_strlen(s); - while (p != s) { - p--; - if (*p == c) - return (Py_UNICODE*)p; - } - return NULL; -} - -Py_UNICODE* -PyUnicode_AsUnicodeCopy(PyObject *unicode) -{ - Py_UNICODE *u, *copy; - Py_ssize_t len, size; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } - u = PyUnicode_AsUnicodeAndSize(unicode, &len); - if (u == NULL) - return NULL; - /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - PyErr_NoMemory(); - return NULL; - } - size = len + 1; /* copy the null character */ - size *= sizeof(Py_UNICODE); - copy = PyMem_Malloc(size); - if (copy == NULL) { - PyErr_NoMemory(); - return NULL; - } - memcpy(copy, u, size); - return copy; -} - /* A _string module, to export formatter_parser and formatter_field_name_split to the string.Formatter class implemented in Python. */ diff --git a/third_party/python/Parser/tokenizer.c b/third_party/python/Parser/tokenizer.c index 2a47ba4d2..5b83f8ad8 100644 --- a/third_party/python/Parser/tokenizer.c +++ b/third_party/python/Parser/tokenizer.c @@ -1702,7 +1702,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } while (c == '_'); } else { - int nonzero = 0; + int nonoctal = 0; /* maybe old-style octal; c is first char of it */ /* in any case, allow '0' as a literal */ while (1) { @@ -1719,8 +1719,25 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) } c = tok_nextc(tok); } + /* [jart] restore octal */ + if ('1' <= c && c <= '7') { + while (1) { + if (c == '_') { + c = tok_nextc(tok); + if (!('0' <= c && c <= '7')) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (!('0' <= c && c <= '7')) { + break; + } + c = tok_nextc(tok); + } + } if (isdigit(c)) { - nonzero = 1; + nonoctal = 1; c = tok_decimal_tail(tok); if (c == 0) { return ERRORTOKEN; @@ -1736,8 +1753,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) else if (c == 'j' || c == 'J') { goto imaginary; } - else if (nonzero) { - /* Old-style octal: now disallowed. */ + else if (nonoctal) { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; diff --git a/third_party/python/Python/cosmomodule.c b/third_party/python/Python/cosmomodule.c index c2b297662..73b8ff439 100644 --- a/third_party/python/Python/cosmomodule.c +++ b/third_party/python/Python/cosmomodule.c @@ -35,6 +35,7 @@ #include "third_party/python/Include/moduleobject.h" #include "third_party/python/Include/pyerrors.h" #include "third_party/python/Include/pymacro.h" +#include "third_party/python/Include/pyport.h" #include "third_party/python/Include/yoink.h" #include "third_party/xed/x86.h" /* clang-format off */ @@ -129,7 +130,7 @@ static PyObject * cosmo_ftrace(PyObject *self, PyObject *noargs) { ftrace_install(); - return Py_None; + Py_RETURN_NONE; } PyDoc_STRVAR(crc32c_doc, diff --git a/third_party/python/Python/getargs.c b/third_party/python/Python/getargs.c index 7bb645c15..c95a8f51a 100644 --- a/third_party/python/Python/getargs.c +++ b/third_party/python/Python/getargs.c @@ -968,7 +968,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, *p = PyUnicode_AsUnicodeAndSize(arg, &len); if (*p == NULL) RETURN_ERR_OCCURRED; - if (Py_UNICODE_strlen(*p) != (size_t)len) { + if (wcslen(*p) != (size_t)len) { PyErr_SetString(PyExc_ValueError, "embedded null character"); RETURN_ERR_OCCURRED; } diff --git a/third_party/python/Python/modsupport.c b/third_party/python/Python/modsupport.c index 5dad7dfb1..a5fe849f4 100644 --- a/third_party/python/Python/modsupport.c +++ b/third_party/python/Python/modsupport.c @@ -301,7 +301,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) } else { if (n < 0) - n = Py_UNICODE_strlen(u); + n = wcslen(u); v = PyUnicode_FromUnicode(u, n); } return v; diff --git a/third_party/python/Python/mystrtoul.c b/third_party/python/Python/mystrtoul.c index 66440642b..8ee8477a2 100644 --- a/third_party/python/Python/mystrtoul.c +++ b/third_party/python/Python/mystrtoul.c @@ -137,11 +137,15 @@ PyOS_strtoul(const char *str, char **ptr, int base) /* skip all zeroes... */ while (*str == '0') ++str; - while (Py_ISSPACE(Py_CHARMASK(*str))) - ++str; - if (ptr) - *ptr = (char *)str; - return 0; + if ('0' <= *str && *str <= '7') { + base = 8; /* [jart] restore octal */ + } else { + while (Py_ISSPACE(Py_CHARMASK(*str))) + ++str; + if (ptr) + *ptr = (char *)str; + return 0; + } } } else diff --git a/third_party/python/Python/recursive.c b/third_party/python/Python/recursive.c new file mode 100644 index 000000000..d7e92a6e7 --- /dev/null +++ b/third_party/python/Python/recursive.c @@ -0,0 +1,32 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "third_party/python/Include/ceval.h" +/* clang-format off */ + +int +(Py_EnterRecursiveCall)(const char *where) +{ + return Py_EnterRecursiveCall(where); +} + +void +(Py_LeaveRecursiveCall)(void) +{ + Py_LeaveRecursiveCall(); +} diff --git a/third_party/python/README.cosmo b/third_party/python/README.cosmo index 64b2a8047..8353cfe67 100644 --- a/third_party/python/README.cosmo +++ b/third_party/python/README.cosmo @@ -11,6 +11,8 @@ LICENSE LOCAL CHANGES + - Restore octal notation + - Support \e → 033 escapes - Undiamond #include lines - Support zipos file loading - Make Python binaries work on six operating systems diff --git a/third_party/python/chibicc.inc b/third_party/python/chibicc.inc new file mode 100644 index 000000000..ca84a41ba --- /dev/null +++ b/third_party/python/chibicc.inc @@ -0,0 +1,3 @@ +#define Py_LIMITED_API +#define PY_SSIZE_T_CLEAN +#include "third_party/python/Include/Python.h" diff --git a/third_party/python/pyobj.c b/third_party/python/pyobj.c index 8bdf6ca4b..91454f8ab 100644 --- a/third_party/python/pyobj.c +++ b/third_party/python/pyobj.c @@ -24,7 +24,6 @@ #include "libc/elf/def.h" #include "libc/fmt/conv.h" #include "libc/log/check.h" -#include "libc/log/libfatal.internal.h" #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/mem/mem.h" diff --git a/third_party/python/python.mk b/third_party/python/python.mk index b944873a4..5ee6f9bc4 100644 --- a/third_party/python/python.mk +++ b/third_party/python/python.mk @@ -204,6 +204,7 @@ THIRD_PARTY_PYTHON_HDRS = \ third_party/python/pyconfig.h THIRD_PARTY_PYTHON_INCS = \ + third_party/python/chibicc.inc \ third_party/python/Objects/stringlib/localeutil.inc \ third_party/python/Objects/stringlib/unicodedefs.inc \ third_party/python/Objects/stringlib/replace.inc \ @@ -509,6 +510,7 @@ THIRD_PARTY_PYTHON_STAGE2_A_SRCS = \ third_party/python/repl.c \ third_party/python/launch.c \ third_party/python/Objects/fromfd.c \ + third_party/python/Objects/unicodeobject-deadcode.c \ third_party/python/Modules/_bisectmodule.c \ third_party/python/Modules/_bz2module.c \ third_party/python/Modules/_codecsmodule.c \ @@ -695,6 +697,7 @@ THIRD_PARTY_PYTHON_STAGE2_A_SRCS = \ third_party/python/Parser/metagrammar.c \ third_party/python/Parser/pgen.c \ third_party/python/Python/dynamic_annotations.c \ + third_party/python/Python/recursive.c \ third_party/python/Python/frozen.c \ third_party/python/Python/frozenmain.c \ third_party/python/Python/getopt.c \ @@ -4214,6 +4217,103 @@ o/$(MODE)/third_party/python/freeze.com.dbg: \ $(APE_NO_MODIFY_SELF) @$(APELINK) +.PRECIOUS: o/$(MODE)/third_party/python/chibicc.inc +o/$(MODE)/third_party/python/chibicc.inc: \ + third_party/python/chibicc.inc \ + libc/assert.h \ + libc/bits/likely.h \ + libc/calls/struct/stat.h \ + libc/calls/struct/timespec.h \ + libc/dce.h \ + libc/errno.h \ + libc/fmt/fmts.h \ + libc/fmt/pflink.h \ + libc/integral/c.inc \ + libc/integral/lp64arg.inc \ + libc/integral/normalize.inc \ + libc/limits.h \ + libc/math.h \ + libc/mem/mem.h \ + libc/nexgen32e/kcpuids.h \ + libc/runtime/runtime.h \ + libc/runtime/symbolic.h \ + libc/runtime/valist.h \ + libc/stdio/stdio.h \ + libc/str/str.h \ + libc/unicode/unicode.h \ + third_party/python/Include/Python.h \ + third_party/python/Include/abstract.h \ + third_party/python/Include/bltinmodule.h \ + third_party/python/Include/boolobject.h \ + third_party/python/Include/bytearrayobject.h \ + third_party/python/Include/bytesobject.h \ + third_party/python/Include/cellobject.h \ + third_party/python/Include/ceval.h \ + third_party/python/Include/classobject.h \ + third_party/python/Include/code.h \ + third_party/python/Include/codecs.h \ + third_party/python/Include/compile.h \ + third_party/python/Include/complexobject.h \ + third_party/python/Include/descrobject.h \ + third_party/python/Include/dictobject.h \ + third_party/python/Include/dtoa.h \ + third_party/python/Include/dynamic_annotations.h \ + third_party/python/Include/enumobject.h \ + third_party/python/Include/eval.h \ + third_party/python/Include/fileobject.h \ + third_party/python/Include/fileutils.h \ + third_party/python/Include/floatobject.h \ + third_party/python/Include/funcobject.h \ + third_party/python/Include/genobject.h \ + third_party/python/Include/import.h \ + third_party/python/Include/intrcheck.h \ + third_party/python/Include/iterobject.h \ + third_party/python/Include/listobject.h \ + third_party/python/Include/longintrepr.h \ + third_party/python/Include/longobject.h \ + third_party/python/Include/memoryobject.h \ + third_party/python/Include/methodobject.h \ + third_party/python/Include/modsupport.h \ + third_party/python/Include/moduleobject.h \ + third_party/python/Include/namespaceobject.h \ + third_party/python/Include/object.h \ + third_party/python/Include/objimpl.h \ + third_party/python/Include/odictobject.h \ + third_party/python/Include/op.h \ + third_party/python/Include/osmodule.h \ + third_party/python/Include/patchlevel.h \ + third_party/python/Include/pyarena.h \ + third_party/python/Include/pyatomic.h \ + third_party/python/Include/pycapsule.h \ + third_party/python/Include/pyctype.h \ + third_party/python/Include/pydebug.h \ + third_party/python/Include/pyerrors.h \ + third_party/python/Include/pyfpe.h \ + third_party/python/Include/pyhash.h \ + third_party/python/Include/pylifecycle.h \ + third_party/python/Include/pymacro.h \ + third_party/python/Include/pymath.h \ + third_party/python/Include/pymem.h \ + third_party/python/Include/pyport.h \ + third_party/python/Include/pystate.h \ + third_party/python/Include/pystrcmp.h \ + third_party/python/Include/pystrtod.h \ + third_party/python/Include/pythonrun.h \ + third_party/python/Include/pytime.h \ + third_party/python/Include/rangeobject.h \ + third_party/python/Include/setobject.h \ + third_party/python/Include/sliceobject.h \ + third_party/python/Include/structseq.h \ + third_party/python/Include/sysmodule.h \ + third_party/python/Include/traceback.h \ + third_party/python/Include/tupleobject.h \ + third_party/python/Include/typeslots.h \ + third_party/python/Include/unicodeobject.h \ + third_party/python/Include/warnings.h \ + third_party/python/Include/weakrefobject.h \ + third_party/python/pyconfig.h + @$(COMPILE) -ACHECK.h $(COMPILE.c) -xc -E -P -fdirectives-only -dD -D__chibicc__ -o $@ $< + ################################################################################ # HELLO.COM diff --git a/third_party/stb/stb_image.c b/third_party/stb/stb_image.c index b262e2bf7..b00d093cd 100644 --- a/third_party/stb/stb_image.c +++ b/third_party/stb/stb_image.c @@ -194,7 +194,7 @@ static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add); } -// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +// returns 1 if "a*b*c + add" has no negaive terms/factors and doesn't overflow static int stbi__mad3sizes_valid(int a, int b, int c, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) && stbi__addsizes_valid(a * b * c, add); diff --git a/tool/build/runit.h b/tool/build/runit.h index 0d363edc9..5a50b794a 100644 --- a/tool/build/runit.h +++ b/tool/build/runit.h @@ -1,9 +1,9 @@ #ifndef COSMOPOLITAN_TOOL_BUILD_RUNIT_H_ #define COSMOPOLITAN_TOOL_BUILD_RUNIT_H_ -#define RUNITD_PORT 31337 -#define RUNITD_MAGIC 0xFEEDABEEu -#define RUNITD_TIMEOUT_MS (1000 * 10) +#define RUNITD_PORT 31337 +#define RUNITD_MAGIC 0xFEEDABEEu +#define RUNITD_TIMEOUT_MS (1000 * 30) enum RunitCommand { kRunitExecute, diff --git a/tool/viz/printimage.c b/tool/viz/printimage.c index fd5caabcc..9849a88c4 100644 --- a/tool/viz/printimage.c +++ b/tool/viz/printimage.c @@ -390,6 +390,7 @@ void WithImageFile(const char *path, sxn = xn; dyn = g_flags.height; dxn = g_flags.width; +#if 1 while (HALF(syn) > dyn || HALF(sxn) > dxn) { if (HALF(sxn) > dxn) { Magikarp2xX(yn, xn, data, syn, sxn); @@ -404,6 +405,7 @@ void WithImageFile(const char *path, syn = HALF(syn); } } +#endif data = EzGyarados(3, dyn, dxn, gc(memalign(32, dyn * dxn * 3)), cn, yn, xn, data, 0, cn, dyn, dxn, syn, sxn, 0, 0, 0, 0); yn = dyn;