From 11ec99931b71602e8d306546e6ded223b855e9de Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sat, 6 Mar 2021 09:52:29 -0800 Subject: [PATCH] Add Musl multibyte functions These are standard functions that are needed to help support the Skull language. Note that normally this codebase uses libc/str/thompike.h See #105 --- libc/limits.h | 4 +- libc/str/btowc.c | 41 +++++ libc/str/c16rtomb.c | 66 +++++++ .../{nexgen32e/wmemset.inc => str/c32rtomb.c} | 24 +-- libc/str/mb.c | 56 ++++++ libc/str/mb.internal.h | 19 ++ libc/{fmt/mbsrtowcs.c => str/mblen.c} | 25 +-- libc/{fmt/mbrtowc.c => str/mbrlen.c} | 30 +--- libc/str/mbrtoc16.c | 60 +++++++ libc/str/mbrtoc32.c | 46 +++++ libc/str/mbrtowc.c | 85 +++++++++ libc/str/mbsinit.c | 23 +++ libc/str/mbsnrtowcs.c | 94 ++++++++++ libc/str/mbsrtowcs.c | 168 ++++++++++++++++++ libc/{fmt => str}/mbstowcs.c | 0 libc/str/mbtowc.c | 85 ++++++--- libc/str/memset16.c | 20 ++- libc/str/str.h | 13 ++ libc/str/str.mk | 1 + libc/str/wcrtomb.c | 68 +++++++ libc/str/wcslen.c | 13 +- libc/str/wcsnrtombs.c | 68 +++++++ libc/str/wcsrtombs.c | 90 ++++++++++ libc/str/wcstombs.c | 23 +++ libc/str/wctob.c | 50 ++++-- libc/str/wctomb.c | 3 +- libc/str/wmempcpy.c | 2 +- libc/str/wmemset.c | 18 +- 28 files changed, 1064 insertions(+), 131 deletions(-) create mode 100644 libc/str/btowc.c create mode 100644 libc/str/c16rtomb.c rename libc/{nexgen32e/wmemset.inc => str/c32rtomb.c} (80%) create mode 100644 libc/str/mb.c create mode 100644 libc/str/mb.internal.h rename libc/{fmt/mbsrtowcs.c => str/mblen.c} (76%) rename libc/{fmt/mbrtowc.c => str/mbrlen.c} (74%) create mode 100644 libc/str/mbrtoc16.c create mode 100644 libc/str/mbrtoc32.c create mode 100644 libc/str/mbrtowc.c create mode 100644 libc/str/mbsinit.c create mode 100644 libc/str/mbsnrtowcs.c create mode 100644 libc/str/mbsrtowcs.c rename libc/{fmt => str}/mbstowcs.c (100%) create mode 100644 libc/str/wcrtomb.c create mode 100644 libc/str/wcsnrtombs.c create mode 100644 libc/str/wcsrtombs.c create mode 100644 libc/str/wcstombs.c diff --git a/libc/limits.h b/libc/limits.h index e895b6da0..6e0ce5d06 100644 --- a/libc/limits.h +++ b/libc/limits.h @@ -87,8 +87,8 @@ #define UINT64_MIN 0ull #define UINTPTR_MIN 0ull -#define MB_CUR_MAX 6 -#define MB_LEN_MAX 6 +#define MB_CUR_MAX 4 +#define MB_LEN_MAX 4 #if !(__ASSEMBLER__ + __LINKER__ + 0) diff --git a/libc/str/btowc.c b/libc/str/btowc.c new file mode 100644 index 000000000..526a65fe1 --- /dev/null +++ b/libc/str/btowc.c @@ -0,0 +1,41 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +wint_t btowc(int c) { + int b = (unsigned char)c; + return b < 128U ? b : (MB_CUR_MAX == 1 && c != EOF) ? CODEUNIT(c) : WEOF; +} diff --git a/libc/str/c16rtomb.c b/libc/str/c16rtomb.c new file mode 100644 index 000000000..15e90379e --- /dev/null +++ b/libc/str/c16rtomb.c @@ -0,0 +1,66 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) { + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + unsigned *x = (unsigned *)ps; + wchar_t wc; + if (!s) { + if (*x) goto ilseq; + return 1; + } + if (!*x && c16 - 0xd800u < 0x400) { + *x = c16 - 0xd7c0 << 10; + return 0; + } + if (*x) { + if (c16 - 0xdc00u >= 0x400) + goto ilseq; + else + wc = *x + c16 - 0xdc00; + *x = 0; + } else { + wc = c16; + } + return wcrtomb(s, wc, 0); +ilseq: + *x = 0; + errno = EILSEQ; + return -1; +} diff --git a/libc/nexgen32e/wmemset.inc b/libc/str/c32rtomb.c similarity index 80% rename from libc/nexgen32e/wmemset.inc rename to libc/str/c32rtomb.c index 5a1e5c74f..a874401a8 100644 --- a/libc/nexgen32e/wmemset.inc +++ b/libc/str/c32rtomb.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,24 +16,8 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/str.h" -/** - * Sets wide memory. - * @asyncsignalsafe - */ -optimizespeed T *wmemset(T *dest, T c, size_t count) { - T v[N]; - size_t i, j; - for (i = 0; i < N; ++i) v[i] = c; - for (i = 0; i < count;) { - if (i + N <= count) { - for (j = 0; j < N; ++j) { - dest[i + j] = v[j]; - } - i += N; - } else { - dest[i++] = c; - } - } - return dest; +size_t c32rtomb(char *s, char32_t c, mbstate_t *t) { + return wcrtomb(s, c, t); } diff --git a/libc/str/mb.c b/libc/str/mb.c new file mode 100644 index 000000000..3aab9a84e --- /dev/null +++ b/libc/str/mb.c @@ -0,0 +1,56 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/mb.internal.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +#define C(x) (x < 2 ? -1 : (R(0x80, 0xc0) | x)) +#define D(x) C((x + 16)) +#define E(x) \ + ((x == 0 ? R(0xa0, 0xc0) \ + : x == 0xd ? R(0x80, 0xa0) \ + : R(0x80, 0xc0)) | \ + (R(0x80, 0xc0) >> 6) | x) +#define F(x) \ + ((x >= 5 ? 0 \ + : x == 0 ? R(0x90, 0xc0) \ + : x == 4 ? R(0x80, 0x90) \ + : R(0x80, 0xc0)) | \ + (R(0x80, 0xc0) >> 6) | (R(0x80, 0xc0) >> 12) | x) + +const uint32_t kMbBittab[51 /* ?! */] = { + C(0x2), C(0x3), C(0x4), C(0x5), C(0x6), C(0x7), C(0x8), C(0x9), C(0xa), + C(0xb), C(0xc), C(0xd), C(0xe), C(0xf), D(0x0), D(0x1), D(0x2), D(0x3), + D(0x4), D(0x5), D(0x6), D(0x7), D(0x8), D(0x9), D(0xa), D(0xb), D(0xc), + D(0xd), D(0xe), D(0xf), E(0x0), E(0x1), E(0x2), E(0x3), E(0x4), E(0x5), + E(0x6), E(0x7), E(0x8), E(0x9), E(0xa), E(0xb), E(0xc), E(0xd), E(0xe), + E(0xf), F(0x0), F(0x1), F(0x2), F(0x3), F(0x4), +}; diff --git a/libc/str/mb.internal.h b/libc/str/mb.internal.h new file mode 100644 index 000000000..fb36af765 --- /dev/null +++ b/libc/str/mb.internal.h @@ -0,0 +1,19 @@ +#ifndef COSMOPOLITAN_LIBC_STR_MB_INTERNAL_H_ +#define COSMOPOLITAN_LIBC_STR_MB_INTERNAL_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +#define SA 0xc2u +#define SB 0xf4u +#define CODEUNIT(c) (0xdfff & (signed char)(c)) +#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80) +#define R(a, b) ((uint32_t)((a == 0x80 ? 0x40u - b : 0u - a) << 23)) +#define FAILSTATE R(0x80, 0x80) +#define OOB(c, b) \ + (((((b) >> 3) - 0x10) | (((b) >> 3) + ((int32_t)(c) >> 26))) & ~7) + +extern const uint32_t kMbBittab[51]; + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_STR_MB_INTERNAL_H_ */ diff --git a/libc/fmt/mbsrtowcs.c b/libc/str/mblen.c similarity index 76% rename from libc/fmt/mbsrtowcs.c rename to libc/str/mblen.c index 20b011a75..1b1f6eeb3 100644 --- a/libc/fmt/mbsrtowcs.c +++ b/libc/str/mblen.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,27 +16,8 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/fmt/fmt.h" -#include "libc/limits.h" #include "libc/str/str.h" -size_t mbsrtowcs(wchar_t *dest, const char **src, size_t len, mbstate_t *ps) { - /* TODO(jart): Need to fix. */ - const unsigned char *s = (const void *)*src; - size_t wn0 = len; - unsigned c = 0; - if (!dest) return strlen((const char *)s); - for (;;) { - if (!len) { - *src = (const void *)s; - return wn0; - } - if (!*s) break; - c = *s++; - *dest++ = c; - len--; - } - *dest = 0; - *src = 0; - return wn0 - len; +int mblen(const char *s, size_t n) { + return mbtowc(0, s, n); } diff --git a/libc/fmt/mbrtowc.c b/libc/str/mbrlen.c similarity index 74% rename from libc/fmt/mbrtowc.c rename to libc/str/mbrlen.c index d5159a263..18966e3d2 100644 --- a/libc/fmt/mbrtowc.c +++ b/libc/str/mbrlen.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,30 +16,10 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/runtime/runtime.h" #include "libc/str/str.h" -#include "libc/sysv/errfuns.h" -static mbstate_t g_mbrtowc; - -size_t mbrtowc(wchar_t *pwc, const char *src, size_t n, mbstate_t *ps) { - /* TODO(jart): Need to fix. */ - wchar_t t; - mbstate_t c; - const unsigned char *p; - if (!ps) ps = &g_mbrtowc; - p = (const unsigned char *)src; - c = *ps; - if (!p && c) return eilseq(); - if (!p) return 0; - if (!pwc) pwc = &t; - if (n) { - if (!c) { - if (0 <= *p && *p < 0x80) { - return !!(*pwc = *p); - } - abort(); /* TODO(jart) */ - } - } - return -2; +size_t mbrlen(const char *s, size_t n, mbstate_t *t) { + static mbstate_t ss; + if (!t) t = &ss; + return mbrtowc(0, s, n, t); } diff --git a/libc/str/mbrtoc16.c b/libc/str/mbrtoc16.c new file mode 100644 index 000000000..5bb38125e --- /dev/null +++ b/libc/str/mbrtoc16.c @@ -0,0 +1,60 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps) { + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + unsigned *pending = (unsigned *)ps; + if (!s) return mbrtoc16(0, "", 1, ps); + /* mbrtowc states for partial UTF-8 characters have the high bit set; + * we use nonzero states without high bit for pending surrogates. */ + if ((int)*pending > 0) { + if (pc16) *pc16 = *pending; + *pending = 0; + return -3; + } + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4) { + if (wc >= 0x10000) { + *pending = (wc & 0x3ff) + 0xdc00; + wc = 0xd7c0 + (wc >> 10); + } + if (pc16) *pc16 = wc; + } + return ret; +} diff --git a/libc/str/mbrtoc32.c b/libc/str/mbrtoc32.c new file mode 100644 index 000000000..eb1fdb5cc --- /dev/null +++ b/libc/str/mbrtoc32.c @@ -0,0 +1,46 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/macros.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t mbrtoc32(char32_t *pc32, const char *s, size_t n, mbstate_t *ps) { + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + if (!s) return mbrtoc32(0, "", 1, ps); + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4 && pc32) *pc32 = wc; + return ret; +} diff --git a/libc/str/mbrtowc.c b/libc/str/mbrtowc.c new file mode 100644 index 000000000..2a760a1dc --- /dev/null +++ b/libc/str/mbrtowc.c @@ -0,0 +1,85 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/macros.internal.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t mbrtowc(wchar_t *wc, const char *src, size_t n, mbstate_t *st) { + static unsigned internal_state; + long wut; + unsigned c; + const unsigned char *s = (const void *)src; + const unsigned N = n; + wchar_t dummy; + if (!st) st = (void *)&internal_state; + c = *(unsigned *)st; + if (!s) { + if (c) goto ilseq; + return 0; + } else if (!wc) { + wc = &dummy; + } + if (!n) return -2; + if (!c) { + if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX == 1) return (*wc = CODEUNIT(*s)), 1; + if (*s - SA > SB - SA) goto ilseq; + wut = *s++ - SA; + wut = MAX(0, MIN(ARRAYLEN(kMbBittab) - 1, wut)); + c = kMbBittab[wut]; + n--; + } + if (n) { + if (OOB(c, *s)) goto ilseq; + loop: + c = c << 6 | *s++ - 0x80; + n--; + if (!(c & (1U << 31))) { + *(unsigned *)st = 0; + *wc = c; + return N - n; + } + if (n) { + if (*s - 0x80u >= 0x40) goto ilseq; + goto loop; + } + } + *(unsigned *)st = c; + return -2; +ilseq: + *(unsigned *)st = 0; + errno = EILSEQ; + return -1; +} diff --git a/libc/str/mbsinit.c b/libc/str/mbsinit.c new file mode 100644 index 000000000..e8cd6ae8b --- /dev/null +++ b/libc/str/mbsinit.c @@ -0,0 +1,23 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/str.h" + +int mbsinit(const mbstate_t *t) { + return !t || !*t; +} diff --git a/libc/str/mbsnrtowcs.c b/libc/str/mbsnrtowcs.c new file mode 100644 index 000000000..a2e61a1da --- /dev/null +++ b/libc/str/mbsnrtowcs.c @@ -0,0 +1,94 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/macros.internal.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t mbsnrtowcs(wchar_t *wcs, const char **src, size_t n, size_t wn, + mbstate_t *st) { + size_t l, cnt = 0, n2; + wchar_t *ws, wbuf[256]; + const char *s = *src; + const char *tmp_s; + if (!wcs) { + ws = wbuf, wn = sizeof(wbuf) / sizeof(*wbuf); + } else { + ws = wcs; + } + /* making sure output buffer size is at most n/4 will ensure + * that mbsrtowcs never reads more than n input bytes. thus + * we can use mbsrtowcs as long as it's practical.. */ + while (s && wn && ((n2 = n / 4) >= wn || n2 > 32)) { + if (n2 >= wn) n2 = wn; + tmp_s = s; + l = mbsrtowcs(ws, &s, n2, st); + if (!(l + 1)) { + cnt = l; + wn = 0; + break; + } + if (ws != wbuf) { + ws += l; + wn -= l; + } + n = s ? n - (s - tmp_s) : 0; + cnt += l; + } + if (s) + while (wn && n) { + l = mbrtowc(ws, s, n, st); + if (l + 2 <= 2) { + if (!(l + 1)) { + cnt = l; + break; + } + if (!l) { + s = 0; + break; + } + /* have to roll back partial character */ + *(unsigned *)st = 0; + break; + } + s += l; + n -= l; + /* safe - this loop runs fewer than sizeof(wbuf)/8 times */ + ws++; + wn--; + cnt++; + } + if (wcs) *src = s; + return cnt; +} diff --git a/libc/str/mbsrtowcs.c b/libc/str/mbsrtowcs.c new file mode 100644 index 000000000..ac9d67ede --- /dev/null +++ b/libc/str/mbsrtowcs.c @@ -0,0 +1,168 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/macros.internal.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t mbsrtowcs(wchar_t *ws, const char **src, size_t wn, mbstate_t *st) { + const unsigned char *s = (const void *)*src; + size_t wn0 = wn; + unsigned c = 0; + if (st && (c = *(unsigned *)st)) { + if (ws) { + *(unsigned *)st = 0; + goto resume; + } else { + goto resume0; + } + } + if (MB_CUR_MAX == 1) { + if (!ws) return strlen((const char *)s); + for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } + if (!*s) break; + c = *s++; + *ws++ = CODEUNIT(c); + wn--; + } + *ws = 0; + *src = 0; + return wn0 - wn; + } + if (!ws) + for (;;) { +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; + if (*s - 1u < 0x7f && (uintptr_t)s % 4 == 0) { + while (!((*(w32 *)s | *(w32 *)s - 0x01010101) & 0x80808080)) { + s += 4; + wn -= 4; + } + } +#endif + if (*s - 1u < 0x7f) { + s++; + wn--; + continue; + } + if (*s - SA > SB - SA) break; + c = kMbBittab[*s++ - SA]; + resume0: + if (OOB(c, *s)) { + s--; + break; + } + s++; + if (c & (1U << 25)) { + if (*s - 0x80u >= 0x40) { + s -= 2; + break; + } + s++; + if (c & (1U << 19)) { + if (*s - 0x80u >= 0x40) { + s -= 3; + break; + } + s++; + } + } + wn--; + c = 0; + } + else + for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; + if (*s - 1u < 0x7f && (uintptr_t)s % 4 == 0) { + while (wn >= 5 && + !((*(w32 *)s | *(w32 *)s - 0x01010101) & 0x80808080)) { + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + wn -= 4; + } + } +#endif + if (*s - 1u < 0x7f) { + *ws++ = *s++; + wn--; + continue; + } + if (*s - SA > SB - SA) break; + c = kMbBittab[*s++ - SA]; + resume: + if (OOB(c, *s)) { + s--; + break; + } + c = (c << 6) | *s++ - 0x80; + if (c & (1U << 31)) { + if (*s - 0x80u >= 0x40) { + s -= 2; + break; + } + c = (c << 6) | *s++ - 0x80; + if (c & (1U << 31)) { + if (*s - 0x80u >= 0x40) { + s -= 3; + break; + } + c = (c << 6) | *s++ - 0x80; + } + } + *ws++ = c; + wn--; + c = 0; + } + if (!c && !*s) { + if (ws) { + *ws = 0; + *src = 0; + } + return wn0 - wn; + } + errno = EILSEQ; + if (ws) *src = (const void *)s; + return -1; +} diff --git a/libc/fmt/mbstowcs.c b/libc/str/mbstowcs.c similarity index 100% rename from libc/fmt/mbstowcs.c rename to libc/str/mbstowcs.c diff --git a/libc/str/mbtowc.c b/libc/str/mbtowc.c index 7c923f4f7..ecd39af9a 100644 --- a/libc/str/mbtowc.c +++ b/libc/str/mbtowc.c @@ -1,36 +1,71 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" #include "libc/limits.h" -#include "libc/macros.internal.h" +#include "libc/str/mb.internal.h" #include "libc/str/str.h" -#include "libc/str/tpdecode.internal.h" -compatfn int mbtowc(wchar_t *wc, const char *s, size_t n) { +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) { + unsigned c; + const unsigned char *s = (const void *)src; + wchar_t dummy; if (!s) return 0; - _Alignas(8) char alt[ROUNDUP(MB_CUR_MAX, 8)]; - if (n < MB_CUR_MAX) { - memset(alt, 0, sizeof(alt)); - memcpy(alt, s, n); - s = &alt[0]; + if (!n) goto ilseq; + if (!wc) wc = &dummy; + if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX == 1) return (*wc = CODEUNIT(*s)), 1; + if (*s - SA > SB - SA) goto ilseq; + c = kMbBittab[*s++ - SA]; + /* Avoid excessive checks against n: If shifting the state n-1 + * times does not clear the high bit, then the value of n is + * insufficient to read a character */ + if (n < 4 && ((c << (6 * n - 6)) & (1U << 31))) goto ilseq; + if (OOB(c, *s)) goto ilseq; + c = c << 6 | *s++ - 0x80; + if (!(c & (1U << 31))) { + *wc = c; + return 2; } - wint_t wi; - int rc = tpdecode(s, &wi); - if (wc) *wc = (wchar_t)wi; - return rc; + if (*s - 0x80u >= 0x40) goto ilseq; + c = c << 6 | *s++ - 0x80; + if (!(c & (1U << 31))) { + *wc = c; + return 3; + } + if (*s - 0x80u >= 0x40) goto ilseq; + *wc = c << 6 | *s++ - 0x80; + return 4; +ilseq: + errno = EILSEQ; + return -1; } diff --git a/libc/str/memset16.c b/libc/str/memset16.c index 6f7ddaae6..5ce88d354 100644 --- a/libc/str/memset16.c +++ b/libc/str/memset16.c @@ -16,12 +16,16 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bigword.internal.h" #include "libc/str/str.h" -#define wmemset memset16 -#define T unsigned short -#define N (BIGWORD / sizeof(T)) -#include "libc/nexgen32e/wmemset.inc" -#undef wmemset -#undef T -#undef N + +/** + * Sets wide memory. + * @asyncsignalsafe + */ +char16_t *memset16(char16_t *p, char16_t c, size_t n) { + size_t i; + for (i = 0; i < n; ++i) { + p[i] = c; + } + return p; +} diff --git a/libc/str/str.h b/libc/str/str.h index 134394141..21f584d5c 100644 --- a/libc/str/str.h +++ b/libc/str/str.h @@ -217,8 +217,21 @@ int mbtowc(wchar_t *, const char *, size_t); size_t mbrtowc(wchar_t *, const char *, size_t, mbstate_t *); size_t mbsrtowcs(wchar_t *, const char **, size_t, mbstate_t *); size_t mbstowcs(wchar_t *, const char *, size_t); +size_t wcrtomb(char *, wchar_t, mbstate_t *); +size_t c32rtomb(char *, char32_t, mbstate_t *); +size_t mbrtoc32(char32_t *, const char *, size_t, mbstate_t *); +size_t c16rtomb(char *, char16_t, mbstate_t *); +size_t mbrtoc16(char16_t *, const char *, size_t, mbstate_t *); +size_t mbrlen(const char *, size_t, mbstate_t *); +size_t mbsnrtowcs(wchar_t *, const char **, size_t, size_t, mbstate_t *); +size_t wcsnrtombs(char *, const wchar_t **, size_t, size_t, mbstate_t *); +size_t wcsrtombs(char *, const wchar_t **, size_t, mbstate_t *); +size_t wcstombs(char *, const wchar_t *, size_t); +int mbsinit(const mbstate_t *); +int mblen(const char *, size_t); int wctomb(char *, wchar_t); int wctob(wint_t); +wint_t btowc(int); size_t strclen(const char *) nosideeffect; size_t strnclen(const char *, size_t) nosideeffect; diff --git a/libc/str/str.mk b/libc/str/str.mk index d634a3a80..9be83eac2 100644 --- a/libc/str/str.mk +++ b/libc/str/str.mk @@ -30,6 +30,7 @@ LIBC_STR_A_CHECKS = \ LIBC_STR_A_DIRECTDEPS = \ LIBC_INTRIN \ LIBC_STUBS \ + LIBC_SYSV \ LIBC_NEXGEN32E LIBC_STR_A_DEPS := \ diff --git a/libc/str/wcrtomb.c b/libc/str/wcrtomb.c new file mode 100644 index 000000000..eaf0e042e --- /dev/null +++ b/libc/str/wcrtomb.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t wcrtomb(char *s, wchar_t wc, mbstate_t *st) { + if (!s) return 1; + if ((unsigned)wc < 0x80) { + *s = wc; + return 1; + } else if (MB_CUR_MAX == 1) { + if (!IS_CODEUNIT(wc)) { + errno = EILSEQ; + return -1; + } + *s = wc; + return 1; + } else if ((unsigned)wc < 0x800) { + *s++ = 0xc0 | (wc >> 6); + *s = 0x80 | (wc & 0x3f); + return 2; + } else if ((unsigned)wc < 0xd800 || (unsigned)wc - 0xe000 < 0x2000) { + *s++ = 0xe0 | (wc >> 12); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s = 0x80 | (wc & 0x3f); + return 3; + } else if ((unsigned)wc - 0x10000 < 0x100000) { + *s++ = 0xf0 | (wc >> 18); + *s++ = 0x80 | ((wc >> 12) & 0x3f); + *s++ = 0x80 | ((wc >> 6) & 0x3f); + *s = 0x80 | (wc & 0x3f); + return 4; + } + errno = EILSEQ; + return -1; +} diff --git a/libc/str/wcslen.c b/libc/str/wcslen.c index 9be3adc44..962024704 100644 --- a/libc/str/wcslen.c +++ b/libc/str/wcslen.c @@ -28,6 +28,15 @@ * @see memmem() */ wchar_t *wcsstr(const wchar_t *haystack, const wchar_t *needle) { - return memmem(haystack, wcslen(haystack) * sizeof(wchar_t), needle, - wcslen(needle) * sizeof(wchar_t)); + size_t i; + for (;;) { + for (i = 0;;) { + if (!needle[i]) return (/*unconst*/ wchar_t *)haystack; + if (!haystack[i]) break; + if (needle[i] != haystack[i]) break; + ++i; + } + if (!*haystack++) break; + } + return NULL; } diff --git a/libc/str/wcsnrtombs.c b/libc/str/wcsnrtombs.c new file mode 100644 index 000000000..d950c6097 --- /dev/null +++ b/libc/str/wcsnrtombs.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t wcsnrtombs(char *dst, const wchar_t **wcs, size_t wn, size_t n, + mbstate_t *st) { + const wchar_t *ws = *wcs; + size_t cnt = 0; + if (!dst) n = 0; + while (ws && wn) { + char tmp[MB_LEN_MAX]; + size_t l = wcrtomb(n < MB_LEN_MAX ? tmp : dst, *ws, 0); + if (l == -1) { + cnt = -1; + break; + } + if (dst) { + if (n < MB_LEN_MAX) { + if (l > n) break; + memcpy(dst, tmp, l); + } + dst += l; + n -= l; + } + if (!*ws) { + ws = 0; + break; + } + ws++; + wn--; + cnt += l; + } + if (dst) *wcs = ws; + return cnt; +} diff --git a/libc/str/wcsrtombs.c b/libc/str/wcsrtombs.c new file mode 100644 index 000000000..804e3bf28 --- /dev/null +++ b/libc/str/wcsrtombs.c @@ -0,0 +1,90 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╚──────────────────────────────────────────────────────────────────────────────╝ +│ │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ +│ │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" +#include "libc/str/str.h" + +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + +size_t wcsrtombs(char *s, const wchar_t **ws, size_t n, mbstate_t *st) { + const wchar_t *ws2; + char buf[4]; + size_t N = n, l; + if (!s) { + for (n = 0, ws2 = *ws; *ws2; ws2++) { + if (*ws2 >= 0x80u) { + l = wcrtomb(buf, *ws2, 0); + if (!(l + 1)) return -1; + n += l; + } else + n++; + } + return n; + } + while (n >= 4) { + if (**ws - 1u >= 0x7fu) { + if (!**ws) { + *s = 0; + *ws = 0; + return N - n; + } + l = wcrtomb(s, **ws, 0); + if (!(l + 1)) return -1; + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + while (n) { + if (**ws - 1u >= 0x7fu) { + if (!**ws) { + *s = 0; + *ws = 0; + return N - n; + } + l = wcrtomb(buf, **ws, 0); + if (!(l + 1)) return -1; + if (l > n) return N - n; + wcrtomb(s, **ws, 0); + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + return N; +} diff --git a/libc/str/wcstombs.c b/libc/str/wcstombs.c new file mode 100644 index 000000000..5cad3ef74 --- /dev/null +++ b/libc/str/wcstombs.c @@ -0,0 +1,23 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/str.h" + +size_t wcstombs(char *s, const wchar_t *ws, size_t n) { + return wcsrtombs(s, &(const wchar_t *){ws}, n, 0); +} diff --git a/libc/str/wctob.c b/libc/str/wctob.c index 7fc8d3f4a..0f9c5524b 100644 --- a/libc/str/wctob.c +++ b/libc/str/wctob.c @@ -1,28 +1,42 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +╚──────────────────────────────────────────────────────────────────────────────╝ │ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ +│ Musl Libc │ +│ Copyright © 2005-2014 Rich Felker, et al. │ +│ │ +│ Permission is hereby granted, free of charge, to any person obtaining │ +│ a copy of this software and associated documentation files (the │ +│ "Software"), to deal in the Software without restriction, including │ +│ without limitation the rights to use, copy, modify, merge, publish, │ +│ distribute, sublicense, and/or sell copies of the Software, and to │ +│ permit persons to whom the Software is furnished to do so, subject to │ +│ the following conditions: │ +│ │ +│ The above copyright notice and this permission notice shall be │ +│ included in all copies or substantial portions of the Software. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │ +│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │ +│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │ +│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │ +│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │ +│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │ +│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │ │ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" +#include "libc/limits.h" +#include "libc/str/mb.internal.h" #include "libc/str/str.h" +asm(".ident\t\"\\n\\n\ +Musl libc (MIT License)\\n\ +Copyright 2005-2014 Rich Felker, et. al.\""); +asm(".include \"libc/disclaimer.inc\""); + int wctob(wint_t c) { - if (0 <= c && c <= 127) { - return c; - } else { - return EOF; - } + if (c < 128U) return c; + if (MB_CUR_MAX == 1 && IS_CODEUNIT(c)) return (unsigned char)c; + return EOF; } diff --git a/libc/str/wctomb.c b/libc/str/wctomb.c index 39776d0aa..02b55da6a 100644 --- a/libc/str/wctomb.c +++ b/libc/str/wctomb.c @@ -18,9 +18,8 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/limits.h" #include "libc/str/str.h" -#include "libc/str/tpencode.internal.h" int wctomb(char *s, wchar_t wc) { if (!s) return 0; - return tpencode(s, MB_CUR_MAX, wc, false); + return wcrtomb(s, wc, 0); } diff --git a/libc/str/wmempcpy.c b/libc/str/wmempcpy.c index b8124f1b8..d72a24300 100644 --- a/libc/str/wmempcpy.c +++ b/libc/str/wmempcpy.c @@ -18,6 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/str/str.h" -compatfn wchar_t *wmempcpy(wchar_t *dest, const wchar_t *src, size_t count) { +wchar_t *wmempcpy(wchar_t *dest, const wchar_t *src, size_t count) { return mempcpy(dest, src, count * sizeof(wchar_t)); } diff --git a/libc/str/wmemset.c b/libc/str/wmemset.c index d5cb5cc60..a79ecff2f 100644 --- a/libc/str/wmemset.c +++ b/libc/str/wmemset.c @@ -16,10 +16,16 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bigword.internal.h" #include "libc/str/str.h" -#define T wchar_t -#define N (BIGWORD / sizeof(T)) -#include "libc/nexgen32e/wmemset.inc" -#undef T -#undef N + +/** + * Sets wide memory. + * @asyncsignalsafe + */ +wchar_t *wmemset(wchar_t *p, wchar_t c, size_t n) { + size_t i; + for (i = 0; i < n; ++i) { + p[i] = c; + } + return p; +}