From 1f2288be6ea0f77d1fc6dd960ca0c1601ba64324 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 2 May 2021 07:48:59 -0700 Subject: [PATCH] Improve backwards compatibility with GNU Make --- Makefile | 22 +-- README.md | 1 + libc/stdio/getdelim.c | 6 +- libc/str/strchr.c | 10 +- libc/str/strstr.c | 3 + libc/str/tprecode16to8.c | 3 + libc/str/tprecode8to16.c | 3 + libc/x/utf16toutf8.c | 91 +++++++++++++ libc/x/utf8toutf16.c | 86 ++++++++++++ libc/x/x.h | 2 + libc/x/xgetline.c | 18 +-- .../stdio/{getline_test.c => getdelim_test.c} | 0 test/libc/str/strrchr_test.c | 125 ++++++++++++++---- test/libc/str/strstr_test.c | 10 ++ .../strrchr_test.inc => x/utf16toutf8_test.c} | 46 +++---- test/libc/x/utf8toutf16_test.c | 40 ++++++ third_party/chibicc/as.c | 36 ++--- tool/build/mkdeps.c | 6 +- 18 files changed, 412 insertions(+), 96 deletions(-) create mode 100644 libc/x/utf16toutf8.c create mode 100644 libc/x/utf8toutf16.c rename test/libc/stdio/{getline_test.c => getdelim_test.c} (100%) rename test/libc/{str/strrchr_test.inc => x/utf16toutf8_test.c} (55%) create mode 100644 test/libc/x/utf8toutf16_test.c diff --git a/Makefile b/Makefile index c87d438fa..68ed09649 100644 --- a/Makefile +++ b/Makefile @@ -189,13 +189,13 @@ include examples/package/build.mk #-φ-examples/package/new.sh include test/test.mk -OBJS = $(foreach x,$(PKGS),$($(x)_OBJS)) -SRCS = $(foreach x,$(PKGS),$($(x)_SRCS)) -HDRS = $(foreach x,$(PKGS),$($(x)_HDRS)) -INCS = $(foreach x,$(PKGS),$($(x)_INCS)) -BINS = $(foreach x,$(PKGS),$($(x)_BINS)) -TESTS = $(foreach x,$(PKGS),$($(x)_TESTS)) -CHECKS = $(foreach x,$(PKGS),$($(x)_CHECKS)) +OBJS = $(foreach x,$(PKGS),$($(x)_OBJS)) +SRCS := $(foreach x,$(PKGS),$($(x)_SRCS)) +HDRS := $(foreach x,$(PKGS),$($(x)_HDRS)) +INCS = $(foreach x,$(PKGS),$($(x)_INCS)) +BINS = $(foreach x,$(PKGS),$($(x)_BINS)) +TESTS = $(foreach x,$(PKGS),$($(x)_TESTS)) +CHECKS = $(foreach x,$(PKGS),$($(x)_CHECKS)) bins: $(BINS) check: $(CHECKS) @@ -206,11 +206,17 @@ tags: TAGS HTAGS o/$(MODE)/.x: @mkdir -p $(@D) && touch $@ +ifneq ($(findstring 4.,,$(MAKE_VERSION)),$(MAKE_VERSION)) o/$(MODE)/srcs.txt: o/$(MODE)/.x $(MAKEFILES) $(call uniq,$(foreach x,$(SRCS),$(dir $(x)))) $(file >$@) $(foreach x,$(SRCS),$(file >>$@,$(x))) - o/$(MODE)/hdrs.txt: o/$(MODE)/.x $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) $(file >$@) $(foreach x,$(HDRS) $(INCS),$(file >>$@,$(x))) +else +o/$(MODE)/srcs.txt: o/$(MODE)/.x $(MAKEFILES) $(call uniq,$(foreach x,$(SRCS),$(dir $(x)))) + $(MAKE) MODE=rel -j8 -pn bopit 2>/dev/null | sed -ne '/^SRCS/ {s/.*:= //;s/ */\n/g;p;q}' >$@ +o/$(MODE)/hdrs.txt: o/$(MODE)/.x $(MAKEFILES) $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x)))) + $(MAKE) MODE=rel -j8 -pn bopit 2>/dev/null | sed -ne '/^HDRS/ {s/.*:= //;s/ */\n/g;p;q}' >$@ +endif o/$(MODE)/depend: o/$(MODE)/.x o/$(MODE)/srcs.txt o/$(MODE)/hdrs.txt $(SRCS) $(HDRS) $(INCS) @$(COMPILE) -AMKDEPS $(MKDEPS) -o $@ -r o/$(MODE)/ o/$(MODE)/srcs.txt o/$(MODE)/hdrs.txt diff --git a/README.md b/README.md index b082a5af9..09c149fd2 100644 --- a/README.md +++ b/README.md @@ -73,3 +73,4 @@ find o -name \*.com | xargs ls -rShal | less | FreeBSD | 12 | 2018 | | OpenBSD | 6.4 | 2018 | | NetBSD | 9.1 | 2020 | +| GNU Make | 3.80 | 2010 | diff --git a/libc/stdio/getdelim.c b/libc/stdio/getdelim.c index dfdadf27e..4667ecc13 100644 --- a/libc/stdio/getdelim.c +++ b/libc/stdio/getdelim.c @@ -33,10 +33,10 @@ * allocated automatically, also NUL-terminated is guaranteed * @param n is the capacity of s (in/out) * @param delim is the stop char (and NUL is implicitly too) - * @return number of bytes read, including delim, excluding NUL, or -1 - * w/ errno on EOF or error; see ferror() and feof() + * @return number of bytes read >0, including delim, excluding NUL, + * or -1 w/ errno on EOF or error; see ferror() and feof() * @note this function can't punt EINTR to caller - * @see getline(), gettok_r() + * @see getline(), chomp(), gettok_r() */ ssize_t getdelim(char **s, size_t *n, int delim, FILE *f) { char *p; diff --git a/libc/str/strchr.c b/libc/str/strchr.c index c74aac9fb..885a4ad8e 100644 --- a/libc/str/strchr.c +++ b/libc/str/strchr.c @@ -17,16 +17,14 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/bits/bits.h" #include "libc/str/str.h" -noasan static const unsigned char *strchr_x64(const unsigned char *p, - uint64_t c) { +noasan static const char *strchr_x64(const char *p, uint64_t c) { unsigned a, b; uint64_t w, x, y; for (c *= 0x0101010101010101;; p += 8) { - w = (uint64_t)p[7] << 070 | (uint64_t)p[6] << 060 | (uint64_t)p[5] << 050 | - (uint64_t)p[4] << 040 | (uint64_t)p[3] << 030 | (uint64_t)p[2] << 020 | - (uint64_t)p[1] << 010 | (uint64_t)p[0] << 000; + w = READ64LE(p); if ((x = ~(w ^ c) & ((w ^ c) - 0x0101010101010101) & 0x8080808080808080) | (y = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) { if (x) { @@ -63,7 +61,7 @@ char *strchr(const char *s, int c) { if ((*s & 0xff) == c) return s; if (!*s) return NULL; } - r = (char *)strchr_x64((const unsigned char *)s, c); + r = strchr_x64(s, c); assert(!r || *r || !c); return r; } diff --git a/libc/str/strstr.c b/libc/str/strstr.c index 70129c56a..af79548c3 100644 --- a/libc/str/strstr.c +++ b/libc/str/strstr.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/safemacros.internal.h" #include "libc/str/str.h" /** @@ -29,6 +30,8 @@ */ char *strstr(const char *haystack, const char *needle) { size_t i; + if (!*needle) return haystack; + haystack = firstnonnull(strchr(haystack, *needle), haystack); for (;;) { for (i = 0;;) { if (!needle[i]) return (/*unconst*/ char *)haystack; diff --git a/libc/str/tprecode16to8.c b/libc/str/tprecode16to8.c index 652dafd60..844127384 100644 --- a/libc/str/tprecode16to8.c +++ b/libc/str/tprecode16to8.c @@ -49,6 +49,9 @@ static noasan axdx_t tprecode16to8_sse2(char *dst, size_t dstsize, /** * Transcodes UTF-16 to UTF-8. * + * This is a low-level function intended for the core runtime. Use + * utf16toutf8() for a much better API that uses malloc(). + * * @param dst is output buffer * @param dstsize is bytes in dst * @param src is NUL-terminated UTF-16 input string diff --git a/libc/str/tprecode8to16.c b/libc/str/tprecode8to16.c index f06e7d104..e93be6dfd 100644 --- a/libc/str/tprecode8to16.c +++ b/libc/str/tprecode8to16.c @@ -46,6 +46,9 @@ static inline noasan axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize, /** * Transcodes UTF-8 to UTF-16. * + * This is a low-level function intended for the core runtime. Use + * utf8toutf16() for a much better API that uses malloc(). + * * @param dst is output buffer * @param dstsize is shorts in dst * @param src is NUL-terminated UTF-8 input string diff --git a/libc/x/utf16toutf8.c b/libc/x/utf16toutf8.c new file mode 100644 index 000000000..8924f905a --- /dev/null +++ b/libc/x/utf16toutf8.c @@ -0,0 +1,91 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" +#include "libc/intrin/packsswb.h" +#include "libc/intrin/pandn.h" +#include "libc/intrin/pcmpgtb.h" +#include "libc/intrin/pcmpgtw.h" +#include "libc/intrin/pmovmskb.h" +#include "libc/intrin/punpckhbw.h" +#include "libc/intrin/punpcklbw.h" +#include "libc/mem/mem.h" +#include "libc/nexgen32e/bsr.h" +#include "libc/str/str.h" +#include "libc/str/thompike.h" +#include "libc/str/tpenc.h" +#include "libc/str/utf16.h" +#include "libc/x/x.h" + +static const int16_t kDel16[8] = {127, 127, 127, 127, 127, 127, 127, 127}; + +/** + * Transcodes UTF-16 to UTF-8. + * + * @param p is input value + * @param n if -1 implies strlen + * @param z if non-NULL receives output length + */ +char *utf16toutf8(const char16_t *p, size_t n, size_t *z) { + char *r, *q; + wint_t x, y; + unsigned m, j, w; + const char16_t *e; + int16_t v1[8], v2[8], v3[8], vz[8]; + if (z) *z = 0; + if (n == -1) n = p ? strlen16(p) : 0; + if ((q = r = malloc(n * 4 + 8 + 1))) { + for (e = p + n; p < e;) { + if (p + 8 < e) { /* 17x ascii */ + memset(vz, 0, 16); + do { + memcpy(v1, p, 16); + pcmpgtw(v2, v1, vz); + pcmpgtw(v3, v1, kDel16); + pandn((void *)v2, (void *)v3, (void *)v2); + if (pmovmskb((void *)v2) != 0xFFFF) break; + packsswb((void *)v1, v1, v1); + memcpy(q, v1, 8); + p += 8; + q += 8; + } while (p + 8 < e); + } + x = *p++ & 0xffff; + if (!IsUcs2(x)) { + if (p < e) { + y = *p++ & 0xffff; + x = MergeUtf16(x, y); + } else { + x = 0xFFFD; + } + } + if (x < 0200) { + *q++ = x; + } else { + w = tpenc(x); + WRITE64LE(q, w); + q += bsr(w) >> 3; + q += 1; + } + } + if (z) *z = q - r; + *q++ = '\0'; + if ((q = realloc(r, (q - r) * 1))) r = q; + } + return r; +} diff --git a/libc/x/utf8toutf16.c b/libc/x/utf8toutf16.c new file mode 100644 index 000000000..6ae24d652 --- /dev/null +++ b/libc/x/utf8toutf16.c @@ -0,0 +1,86 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/pcmpgtb.h" +#include "libc/intrin/pmovmskb.h" +#include "libc/intrin/punpckhbw.h" +#include "libc/intrin/punpcklbw.h" +#include "libc/mem/mem.h" +#include "libc/str/str.h" +#include "libc/str/thompike.h" +#include "libc/str/utf16.h" +#include "libc/x/x.h" + +/** + * Transcodes UTF-8 to UTF-16. + * + * @param p is input value + * @param n if -1 implies strlen + * @param z if non-NULL receives output length + */ +char16_t *utf8toutf16(const char *p, size_t n, size_t *z) { + size_t i; + wint_t x, a, b; + char16_t *r, *q; + unsigned m, j, w; + uint8_t v1[16], v2[16], vz[16]; + if (z) *z = 0; + if (n == -1) n = p ? strlen(p) : 0; + if ((q = r = malloc(n * sizeof(char16_t) * 2 + sizeof(char16_t)))) { + for (i = 0; i < n;) { + if (i + 16 < n) { /* 34x ascii */ + memset(vz, 0, 16); + do { + memcpy(v1, p + i, 16); + pcmpgtb((int8_t *)v2, (int8_t *)v1, (int8_t *)vz); + if (pmovmskb(v2) != 0xFFFF) break; + punpcklbw(v2, v1, vz); + punpckhbw(v1, v1, vz); + memcpy(q + 0, v2, 16); + memcpy(q + 8, v1, 16); + i += 16; + q += 16; + } while (i + 16 < n); + } + x = p[i++] & 0xff; + if (x >= 0300) { + a = ThomPikeByte(x); + m = ThomPikeLen(x) - 1; + if (i + m <= n) { + for (j = 0;;) { + b = p[i + j] & 0xff; + if (!ThomPikeCont(b)) break; + a = ThomPikeMerge(a, b); + if (++j == m) { + x = a; + i += j; + break; + } + } + } + } + w = EncodeUtf16(x); + *q++ = w; + if ((w >>= 16)) *q++ = w; + } + if (z) *z = q - r; + *q++ = '\0'; + if ((q = realloc(r, (q - r) * sizeof(char16_t)))) r = q; + } + return r; +} diff --git a/libc/x/x.h b/libc/x/x.h index dd359878f..5bab222f5 100644 --- a/libc/x/x.h +++ b/libc/x/x.h @@ -51,6 +51,8 @@ char *xstrmul(const char *, size_t) paramsnonnull((1)) _XMAL; char *xinet_ntop(int, const void *) _XPNN _XMAL; void *xunbinga(size_t, const char16_t *) attributeallocalign((1)) _XMAL _XRET; void *xunbing(const char16_t *) _XMAL _XRET; +char16_t *utf8toutf16(const char *, size_t, size_t *) nodiscard; +char *utf16toutf8(const char16_t *, size_t, size_t *) nodiscard; /*───────────────────────────────────────────────────────────────────────────│─╗ │ cosmopolitan § eXtended apis » files ─╬─│┼ diff --git a/libc/x/xgetline.c b/libc/x/xgetline.c index 6bf4f2ba8..56123ffef 100644 --- a/libc/x/xgetline.c +++ b/libc/x/xgetline.c @@ -25,16 +25,18 @@ * * @return allocated line that needs free() and usually chomp() too, * or NULL on ferror() or feof() - * @see getline() for a more difficult api + * @see getdelim() for a more difficult api + * @see chomp() */ char *xgetline(FILE *f) { - char *res; - size_t n, got; + char *p; + size_t n; + ssize_t m; n = 0; - res = NULL; - if ((got = getdelim(&res, &n, '\n', f)) <= 0) { - free(res); - res = NULL; + p = 0; + if ((m = getdelim(&p, &n, '\n', f)) <= 0) { + free(p); + p = 0; } - return res; + return p; } diff --git a/test/libc/stdio/getline_test.c b/test/libc/stdio/getdelim_test.c similarity index 100% rename from test/libc/stdio/getline_test.c rename to test/libc/stdio/getdelim_test.c diff --git a/test/libc/str/strrchr_test.c b/test/libc/str/strrchr_test.c index cf99fc6c9..a068bcbf2 100644 --- a/test/libc/str/strrchr_test.c +++ b/test/libc/str/strrchr_test.c @@ -19,34 +19,101 @@ #include "libc/str/str.h" #include "libc/testlib/testlib.h" -#define T(NAME) NAME -#define S(S) S -#define C(C) C -#include "test/libc/str/strrchr_test.inc" -#undef C -#undef S -#undef T +TEST(strrchr, test) { + EXPECT_EQ(NULL, strrchr("hello", 'z')); + EXPECT_STREQ("lo", strrchr("hello", 'l')); + EXPECT_STREQ("llo", strchr("hello", 'l')); + EXPECT_STREQ("hello", strrchr("hello", 'h')); + EXPECT_STREQ("ello", strrchr("hello", 'e')); + EXPECT_STREQ("o", strrchr("hello", 'o')); +} -#define T(NAME) NAME##16 -#define S(S) u##S -#define C(C) u##C -#define strrchr(x, y) strrchr16(x, y) -#define strchr(x, y) strchr16(x, y) -#include "test/libc/str/strrchr_test.inc" -#undef strchr -#undef strrchr -#undef C -#undef S -#undef T +TEST(strrchr, simdVectorStuffIsntBroken) { + EXPECT_EQ(NULL, strrchr("--------------------------------", 'x')); + EXPECT_STREQ("x", strrchr("-------------------------------x", 'x')); + EXPECT_STREQ("x-------------------------------", + strrchr("x-------------------------------", 'x')); + EXPECT_STREQ("x" + "z-------------------------------", + strrchr("x" + "z-------------------------------", + 'x')); + EXPECT_STREQ("x-------------------------------" + "y-------------------------------", + strrchr("x-------------------------------" + "y-------------------------------", + 'x')); + EXPECT_STREQ("x" + "z-------------------------------" + "y-------------------------------", + strrchr("x" + "z-------------------------------" + "y-------------------------------", + 'x')); +} -#define T(NAME) NAME##32 -#define S(S) L##S -#define C(C) L##C -#define strchr(x, y) wcschr(x, y) -#define strrchr(x, y) wcsrchr(x, y) -#include "test/libc/str/strrchr_test.inc" -#undef strchr -#undef strrchr -#undef C -#undef S -#undef T +TEST(strrchr16, test) { + EXPECT_EQ(NULL, strrchr16(u"hello", 'z')); + EXPECT_STREQ(u"lo", strrchr16(u"hello", 'l')); + EXPECT_STREQ(u"llo", strchr16(u"hello", 'l')); + EXPECT_STREQ(u"hello", strrchr16(u"hello", 'h')); + EXPECT_STREQ(u"ello", strrchr16(u"hello", 'e')); + EXPECT_STREQ(u"o", strrchr16(u"hello", 'o')); +} + +TEST(strrchr16, simdVectorStuffIsntBroken) { + EXPECT_EQ(NULL, strrchr16(u"--------------------------------", 'x')); + EXPECT_STREQ(u"x", strrchr16(u"-------------------------------x", 'x')); + EXPECT_STREQ(u"x-------------------------------", + strrchr16(u"x-------------------------------", 'x')); + EXPECT_STREQ(u"x" + u"z-------------------------------", + strrchr16(u"x" + u"z-------------------------------", + 'x')); + EXPECT_STREQ(u"x-------------------------------" + u"y-------------------------------", + strrchr16(u"x-------------------------------" + u"y-------------------------------", + 'x')); + EXPECT_STREQ(u"x" + u"z-------------------------------" + u"y-------------------------------", + strrchr16(u"x" + u"z-------------------------------" + u"y-------------------------------", + 'x')); +} + +TEST(wcsrchr, test) { + EXPECT_EQ(NULL, wcsrchr(L"hello", 'z')); + EXPECT_STREQ(L"lo", wcsrchr(L"hello", 'l')); + EXPECT_STREQ(L"llo", wcschr(L"hello", 'l')); + EXPECT_STREQ(L"hello", wcsrchr(L"hello", 'h')); + EXPECT_STREQ(L"ello", wcsrchr(L"hello", 'e')); + EXPECT_STREQ(L"o", wcsrchr(L"hello", 'o')); +} + +TEST(wcsrchr, simdVectorStuffIsntBroken) { + EXPECT_EQ(NULL, wcsrchr(L"--------------------------------", 'x')); + EXPECT_STREQ(L"x", wcsrchr(L"-------------------------------x", 'x')); + EXPECT_STREQ(L"x-------------------------------", + wcsrchr(L"x-------------------------------", 'x')); + EXPECT_STREQ(L"x" + L"z-------------------------------", + wcsrchr(L"x" + L"z-------------------------------", + 'x')); + EXPECT_STREQ(L"x-------------------------------" + L"y-------------------------------", + wcsrchr(L"x-------------------------------" + L"y-------------------------------", + 'x')); + EXPECT_STREQ(L"x" + L"z-------------------------------" + L"y-------------------------------", + wcsrchr(L"x" + L"z-------------------------------" + L"y-------------------------------", + 'x')); +} diff --git a/test/libc/str/strstr_test.c b/test/libc/str/strstr_test.c index c80872c78..273e54e93 100644 --- a/test/libc/str/strstr_test.c +++ b/test/libc/str/strstr_test.c @@ -23,6 +23,8 @@ #include "libc/nexgen32e/x86feature.h" #include "libc/runtime/gc.internal.h" #include "libc/str/internal.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" #define MAKESTRING(NAME, VALUE) \ @@ -75,3 +77,11 @@ TEST(strstr, test) { ASSERT_EQ(NULL, strstr("-Wl,--gc-sections", "sanitize")); ASSERT_STREQ("x", strstr("x", "x")); } + +BENCH(strstr, bench) { + EZBENCH2("strstr", donothing, EXPROPRIATE(strstr(kHyperion, "THE END"))); + EZBENCH2("strstr", donothing, + EXPROPRIATE(strstr( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", + "aaaaaab"))); +} diff --git a/test/libc/str/strrchr_test.inc b/test/libc/x/utf16toutf8_test.c similarity index 55% rename from test/libc/str/strrchr_test.inc rename to test/libc/x/utf16toutf8_test.c index 8f5773a24..476105b82 100644 --- a/test/libc/str/strrchr_test.inc +++ b/test/libc/x/utf16toutf8_test.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,31 +16,27 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/mem/mem.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" +#include "libc/testlib/testlib.h" +#include "libc/x/x.h" -TEST(T(strrchr), test) { - EXPECT_EQ(NULL, strrchr(S("hello"), C('z'))); - EXPECT_STREQ(S("lo"), strrchr(S("hello"), C('l'))); - EXPECT_STREQ(S("llo"), strchr(S("hello"), C('l'))); - EXPECT_STREQ(S("hello"), strrchr(S("hello"), C('h'))); - EXPECT_STREQ(S("ello"), strrchr(S("hello"), C('e'))); - EXPECT_STREQ(S("o"), strrchr(S("hello"), C('o'))); +TEST(utf16toutf8, test) { + EXPECT_STREQ("hello☻♥", gc(utf16toutf8(u"hello☻♥", -1, 0))); + EXPECT_STREQ("hello☻♥hello☻♥h", gc(utf16toutf8(u"hello☻♥hello☻♥h", -1, 0))); + EXPECT_STREQ("hello☻♥hello☻♥hi", gc(utf16toutf8(u"hello☻♥hello☻♥hi", -1, 0))); + EXPECT_STREQ("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", + gc(utf16toutf8(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0))); + EXPECT_STREQ("hello--hello--h", gc(utf16toutf8(u"hello--hello--h", -1, 0))); + EXPECT_STREQ("hello--hello--hi", gc(utf16toutf8(u"hello--hello--hi", -1, 0))); + EXPECT_STREQ("hello--hello--hello--hello--hello--", + gc(utf16toutf8(u"hello--hello--hello--hello--hello--", -1, 0))); } -TEST(T(strrchr), simdVectorStuffIsntBroken) { - EXPECT_EQ(NULL, strrchr(S("--------------------------------"), C('x'))); - EXPECT_STREQ(S("x"), strrchr(S("-------------------------------x"), C('x'))); - EXPECT_STREQ(S("x-------------------------------"), - strrchr(S("x-------------------------------"), C('x'))); - EXPECT_STREQ(S("x") S("z-------------------------------"), - strrchr(S("x") S("z-------------------------------"), C('x'))); - EXPECT_STREQ(S("x-------------------------------") - S("y-------------------------------"), - strrchr(S("x-------------------------------") - S("y-------------------------------"), - C('x'))); - EXPECT_STREQ(S("x") S("z-------------------------------") - S("y-------------------------------"), - strrchr(S("x") S("z-------------------------------") - S("y-------------------------------"), - C('x'))); +BENCH(utf16toutf8, bench) { + size_t n; + char16_t *h; + h = utf8toutf16(kHyperion, kHyperionSize, &n); + EZBENCH2("utf16toutf8", donothing, free(utf16toutf8(h, n, 0))); } diff --git a/test/libc/x/utf8toutf16_test.c b/test/libc/x/utf8toutf16_test.c new file mode 100644 index 000000000..d78391416 --- /dev/null +++ b/test/libc/x/utf8toutf16_test.c @@ -0,0 +1,40 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/mem/mem.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" +#include "libc/testlib/testlib.h" +#include "libc/x/x.h" + +TEST(utf8toutf16, test) { + EXPECT_STREQ(u"hello☻♥", gc(utf8toutf16("hello☻♥", -1, 0))); + EXPECT_STREQ(u"hello☻♥hello☻♥h", gc(utf8toutf16("hello☻♥hello☻♥h", -1, 0))); + EXPECT_STREQ(u"hello☻♥hello☻♥hi", gc(utf8toutf16("hello☻♥hello☻♥hi", -1, 0))); + EXPECT_STREQ(u"hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", + gc(utf8toutf16("hello☻♥hello☻♥hello☻♥hello☻♥hello☻♥", -1, 0))); + EXPECT_STREQ(u"hello--hello--h", gc(utf8toutf16("hello--hello--h", -1, 0))); + EXPECT_STREQ(u"hello--hello--hi", gc(utf8toutf16("hello--hello--hi", -1, 0))); + EXPECT_STREQ(u"hello--hello--hello--hello--hello--", + gc(utf8toutf16("hello--hello--hello--hello--hello--", -1, 0))); +} + +BENCH(utf8toutf16, bench) { + EZBENCH2("utf8toutf16", donothing, + free(utf8toutf16(kHyperion, kHyperionSize, 0))); +} diff --git a/third_party/chibicc/as.c b/third_party/chibicc/as.c index 88c430b49..a8b8d272c 100644 --- a/third_party/chibicc/as.c +++ b/third_party/chibicc/as.c @@ -136,7 +136,12 @@ #define ISRIP 0x00080000 #define ISREG 0x00100000 -#define APPEND(L) L.p = realloc(L.p, ++L.n * sizeof(*L.p)) +#define APPEND(L) \ + if (++L.n > L.c) { \ + L.c = L.n + 2 + (L.c >> 1); \ + L.p = realloc(L.p, L.c * sizeof(*L.p)); \ + } + #define IS(P, N, S) (N == sizeof(S) - 1 && !strncasecmp(P, S, sizeof(S) - 1)) #define MAX(X, Y) ((Y) < (X) ? (X) : (Y)) #define READ128BE(S) ((unsigned __int128)READ64BE(S) << 64 | READ64BE((S) + 8)) @@ -152,29 +157,29 @@ struct As { bool inhibiterr; bool inhibitwarn; struct Ints { - unsigned long n; + unsigned long n, c; long *p; } ints; struct Floats { - unsigned long n; + unsigned long n, c; long double *p; } floats; struct Slices { - unsigned long n; + unsigned long n, c; struct Slice { - unsigned long n; + unsigned long n, c; char *p; } * p; } slices; struct Sauces { - unsigned long n; + unsigned long n, c; struct Sauce { unsigned path; // strings unsigned line; // 1-indexed } * p; } sauces; struct Things { - unsigned long n; + unsigned long n, c; struct Thing { enum ThingType { TT_INT, @@ -189,7 +194,7 @@ struct As { } * p; } things; struct Sections { - unsigned long n; + unsigned long n, c; struct Section { unsigned name; // strings int flags; @@ -199,7 +204,7 @@ struct As { } * p; } sections; struct Symbols { - unsigned long n; + unsigned long n, c; struct Symbol { bool isused; unsigned char stb; // STB_* @@ -220,7 +225,7 @@ struct As { } * p; } symbolindex; struct Labels { - unsigned long n; + unsigned long n, c; struct Label { unsigned id; unsigned tok; // things @@ -228,7 +233,7 @@ struct As { } * p; } labels; struct Relas { - unsigned long n; + unsigned long n, c; struct Rela { bool isdead; int kind; // R_X86_64_{16,32,64,PC8,PC32,PLT32,GOTPCRELX,...} @@ -239,7 +244,7 @@ struct As { } * p; } relas; struct Exprs { - unsigned long n; + unsigned long n, c; struct Expr { enum ExprKind { EX_INT, // integer @@ -277,11 +282,11 @@ struct As { } * p; } exprs; struct Strings { - unsigned long n; + unsigned long n, c; char **p; } strings, incpaths; struct SectionStack { - unsigned long n; + unsigned long n, c; int *p; } sectionstack; }; @@ -805,8 +810,7 @@ static void Tokenize(struct As *a, int path) { continue; } if (c == '"') { - buf.n = 0; - buf.p = NULL; + memset(&buf, 0, sizeof(buf)); for (i = 1; (c = p[i++]);) { if (c == '"') break; c = ReadCharLiteral(&buf, c, p, &i); diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index 733c9e660..b91da3889 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -239,7 +239,11 @@ void LoadRelationships(int argc, char *argv[]) { buf += PAGESIZE; buf[-1] = '\n'; for (i = optind; i < argc; ++i) { - CHECK_NOTNULL((finpaths = fopen(argv[i], "r"))); + if (!(finpaths = fopen(argv[i], "r"))) { + fprintf(stderr, "\n\e[1mERROR: %s FAILED BECAUSE %s CAUSED %m\e[0m\n\n", + argv[0], argv[i]); + exit(1); + } while (getline(&line, &linecap, finpaths) != -1) { src = chomp(line); if (ShouldSkipSource(src)) continue;