Do some string library work

This commit is contained in:
Justine Tunney 2022-08-20 21:36:07 -07:00
parent 83d41e4588
commit 35203c0551
42 changed files with 1381 additions and 136 deletions

View file

@ -21,12 +21,13 @@
/**
* Copies NUL-terminated UCS-2 or UTF-16 string.
*
* DEST and SRC must not overlap unless DEST SRC.
* 𝑑 and 𝑠 must not overlap unless 𝑑 𝑠.
*
* @param dest is destination memory
* @param src is a NUL-terminated 16-bit string
* @return original dest
* @param d is dination memory
* @param s is a NUL-terminated 16-bit string
* @return original d
* @asyncsignalsafe
*/
char16_t *strcpy16(char16_t *dest, const char16_t *src) {
return memcpy(dest, src, (strlen16(src) + 1) * sizeof(char16_t));
char16_t *strcpy16(char16_t *d, const char16_t *s) {
return memcpy(d, s, (strlen16(s) + 1) * sizeof(char16_t));
}

View file

@ -31,7 +31,7 @@ typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
*/
noasan size_t strlen16(const char16_t *s) {
size_t n;
xmm_t v, z = {0};
xmm_t z = {0};
unsigned m, k = (uintptr_t)s & 15;
const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
if (IsAsan()) __asan_verify(s, 2);

View file

@ -50,7 +50,7 @@ static noasan axdx_t tprecode16to8_sse2(char *dst, size_t dstsize,
* Transcodes UTF-16 to UTF-8.
*
* This is a low-level function intended for the core runtime. Use
* utf16toutf8() for a much better API that uses malloc().
* utf16to8() for a much better API that uses malloc().
*
* @param dst is output buffer
* @param dstsize is bytes in dst

View file

@ -47,7 +47,7 @@ static inline noasan axdx_t tprecode8to16_sse2(char16_t *dst, size_t dstsize,
* Transcodes UTF-8 to UTF-16.
*
* This is a low-level function intended for the core runtime. Use
* utf8toutf16() for a much better API that uses malloc().
* utf8to16() for a much better API that uses malloc().
*
* @param dst is output buffer
* @param dstsize is shorts in dst

View file

@ -23,12 +23,11 @@
*
* 𝑑 and 𝑠 must not overlap unless 𝑑 𝑠.
*
* @param 𝑑 is destination memory
* @param 𝑠 is a NUL-terminated string
* @return original dest
* @param d is destination memory
* @param s is a NUL-terminated string
* @return original d
* @asyncsignalsafe
*/
wchar_t *wcscpy(wchar_t *d, const wchar_t *s) {
memcpy(d, s, (wcslen(s) + 1) * sizeof(wchar_t));
return d;
return memcpy(d, s, (wcslen(s) + 1) * sizeof(wchar_t));
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,27 +16,28 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h"
typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
/**
* Searches for substring.
* Returns length of NUL-terminated wide string.
*
* @param haystack is the search area, as a NUL-terminated string
* @param needle is the desired substring, also NUL-terminated
* @return pointer to first substring within haystack, or NULL
* @param s is non-null NUL-terminated wide string pointer
* @return number of wide characters (excluding NUL)
* @asyncsignalsafe
* @see memmem()
*/
wchar_t *wcsstr(const wchar_t *haystack, const wchar_t *needle) {
size_t i;
for (;;) {
for (i = 0;;) {
if (!needle[i]) return (/*unconst*/ wchar_t *)haystack;
if (!haystack[i]) break;
if (needle[i] != haystack[i]) break;
++i;
}
if (!*haystack++) break;
}
return NULL;
noasan size_t wcslen(const wchar_t *s) {
size_t n;
xmm_t z = {0};
unsigned m, k = (uintptr_t)s & 15;
const xmm_t *p = (const xmm_t *)((uintptr_t)s & -16);
if (IsAsan()) __asan_verify(s, 4);
m = __builtin_ia32_pmovmskb128(*p == z) >> k << k;
while (!m) m = __builtin_ia32_pmovmskb128(*++p == z);
n = (const wchar_t *)p + (__builtin_ctzl(m) >> 2) - s;
if (IsAsan()) __asan_verify(s, n);
return n;
}

View file

@ -30,6 +30,6 @@ int wcsncasecmp(const wchar_t *a, const wchar_t *b, size_t n) {
size_t i = 0;
unsigned x, y;
if (!n-- || a == b) return 0;
while ((x = tolower(a[i])) == (y = tolower(b[i])) && b[i] && i < n) ++i;
while ((x = towlower(a[i])) == (y = towlower(b[i])) && b[i] && i < n) ++i;
return x - y;
}

42
libc/str/wcsstr.c Normal file
View file

@ -0,0 +1,42 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/str.h"
/**
* Searches for substring.
*
* @param haystack is the search area, as a NUL-terminated string
* @param needle is the desired substring, also NUL-terminated
* @return pointer to first substring within haystack, or NULL
* @asyncsignalsafe
* @see memmem()
*/
wchar_t *wcsstr(const wchar_t *haystack, const wchar_t *needle) {
size_t i;
for (;;) {
for (i = 0;;) {
if (!needle[i]) return (/*unconst*/ wchar_t *)haystack;
if (!haystack[i]) break;
if (needle[i] != haystack[i]) break;
++i;
}
if (!*haystack++) break;
}
return NULL;
}