mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-01 03:53:33 +00:00
7c83f4abc8
- wcsstr() is now linearly complex - strstr16() is now linearly complex - strstr() is now vectorized on aarch64 (10x) - strstr() now uses KMP on pathological cases - memmem() is now vectorized on aarch64 (10x) - memmem() now uses KMP on pathological cases - Disable shared_ptr::owner_before until fixed - Make iswlower(), iswupper() consistent with glibc - Remove figure space from iswspace() implementation - Include line and paragraph separator in iswcntrl() - Use Musl wcwidth(), iswalpha(), iswpunct(), towlower(), towupper()
129 lines
5.1 KiB
C
129 lines
5.1 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/str/str.h"
|
|
#include "libc/str/kmp.h"
|
|
#include "third_party/aarch64/arm_neon.internal.h"
|
|
#include "third_party/intel/immintrin.internal.h"
|
|
|
|
/**
|
|
* Searches for substring.
|
|
*
|
|
* This function offers assurances against pathological cases, using KMP
|
|
* if no progress is being made on the O(nm) vectorized fast path. It is
|
|
* important to note that, if `needle` is untrusted, that it not be long
|
|
* enough to overflow the stack. That's because KMP needs to allocate an
|
|
* array of longs the same length as `needle` and it needs to do it with
|
|
* stack memory since POSIX requires this function to be safe to call in
|
|
* signal handlers.
|
|
*
|
|
* @param haystack is the search area, as a NUL-terminated string
|
|
* @param needle is the desired substring, also NUL-terminated
|
|
* @return pointer to first substring within haystack, or NULL
|
|
* @asyncsignalsafe
|
|
* @see strcasestr()
|
|
* @see memmem()
|
|
*/
|
|
__vex char *strstr(const char *haystack, const char *needle) {
|
|
if (haystack == needle || !*needle)
|
|
return (char *)haystack;
|
|
#if defined(__x86_64__) && !defined(__chibicc__)
|
|
size_t i;
|
|
unsigned k, m;
|
|
const __m128i *p;
|
|
long progress = 0;
|
|
__m128i v, n, z = _mm_setzero_si128();
|
|
const char *hay = haystack;
|
|
n = _mm_set1_epi8(*needle);
|
|
for (;;) {
|
|
k = (uintptr_t)hay & 15;
|
|
p = (const __m128i *)((uintptr_t)hay & -16);
|
|
v = _mm_load_si128(p);
|
|
m = _mm_movemask_epi8(
|
|
_mm_or_si128(_mm_cmpeq_epi8(v, z), _mm_cmpeq_epi8(v, n)));
|
|
m >>= k;
|
|
m <<= k;
|
|
while (!m) {
|
|
progress += 16;
|
|
v = _mm_load_si128(++p);
|
|
m = _mm_movemask_epi8(
|
|
_mm_or_si128(_mm_cmpeq_epi8(v, z), _mm_cmpeq_epi8(v, n)));
|
|
}
|
|
int offset = __builtin_ctzl(m);
|
|
progress += offset;
|
|
hay = (const char *)p + offset;
|
|
for (i = 0;; ++i) {
|
|
if (--progress <= -512)
|
|
goto OfferPathologicalAssurances;
|
|
if (!needle[i])
|
|
return (/*unconst*/ char *)hay;
|
|
if (!hay[i])
|
|
break;
|
|
if (needle[i] != hay[i])
|
|
break;
|
|
}
|
|
if (!*hay++)
|
|
break;
|
|
}
|
|
return 0;
|
|
OfferPathologicalAssurances:
|
|
#elif defined(__aarch64__) && defined(__ARM_NEON)
|
|
size_t i;
|
|
const char *hay = haystack;
|
|
uint8x16_t n = vdupq_n_u8(*needle);
|
|
uint8x16_t z = vdupq_n_u8(0);
|
|
long progress = 0;
|
|
for (;;) {
|
|
int k = (uintptr_t)hay & 15;
|
|
hay = (const char *)((uintptr_t)hay & -16);
|
|
uint8x16_t v = vld1q_u8((const uint8_t *)hay);
|
|
uint8x16_t cmp = vorrq_u8(vceqq_u8(v, z), vceqq_u8(v, n));
|
|
uint8x8_t mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4);
|
|
uint64_t m;
|
|
vst1_u8((uint8_t *)&m, mask);
|
|
m >>= k * 4;
|
|
m <<= k * 4;
|
|
while (!m) {
|
|
hay += 16;
|
|
progress += 16;
|
|
v = vld1q_u8((const uint8_t *)hay);
|
|
cmp = vorrq_u8(vceqq_u8(v, z), vceqq_u8(v, n));
|
|
mask = vshrn_n_u16(vreinterpretq_u16_u8(cmp), 4);
|
|
vst1_u8((uint8_t *)&m, mask);
|
|
}
|
|
int offset = __builtin_ctzll(m) >> 2;
|
|
progress += offset;
|
|
hay += offset;
|
|
for (i = 0;; ++i) {
|
|
if (--progress <= -512)
|
|
goto OfferPathologicalAssurances;
|
|
if (!needle[i])
|
|
return (/*unconst*/ char *)hay;
|
|
if (!hay[i])
|
|
break;
|
|
if (needle[i] != hay[i])
|
|
break;
|
|
}
|
|
if (!*hay++)
|
|
break;
|
|
}
|
|
return 0;
|
|
OfferPathologicalAssurances:
|
|
#endif
|
|
return __memmem_kmp(haystack, strlen(haystack), needle, strlen(needle));
|
|
}
|