Make more code aarch64 friendly

This commit is contained in:
Justine Tunney 2023-05-02 13:38:16 -07:00
parent ca2860947f
commit 2b73e72d59
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
568 changed files with 2197 additions and 1061 deletions

View file

@ -38,6 +38,7 @@ static dontinline antiquity int bcmp_sse(const char *p, const char *q,
return !!(a[0] | a[1]);
}
#ifdef __x86_64__
microarchitecture("avx") static int bcmp_avx(const char *p, const char *q,
size_t n) {
xmm_t a, b, c, d;
@ -67,6 +68,7 @@ microarchitecture("avx") static int bcmp_avx(const char *p, const char *q,
*(const xmm_t *)(p + n - 16) ^ *(const xmm_t *)(q + n - 16);
return !!(a[0] | a[1]);
}
#endif
/**
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
@ -122,8 +124,10 @@ int bcmp(const void *a, const void *b, size_t n) {
__builtin_memcpy(&j, q + n - 4, 4);
return !!(i ^ j);
}
#ifdef __x86_64__
} else if (LIKELY(X86_HAVE(AVX))) {
return bcmp_avx(p, q, n);
#endif
} else {
return bcmp_sse(p, q, n);
}

View file

@ -21,6 +21,7 @@
#include "libc/intrin/bits.h"
#include "libc/nexgen32e/cachesize.h"
#include "libc/nexgen32e/cpuid4.internal.h"
#ifdef __x86_64__
static unsigned _getcachesize_cpuid4(int type, int level) {
unsigned i, k;
@ -55,3 +56,5 @@ unsigned _getcachesize(int type, int level) {
_unassert(level >= 1);
return _getcachesize_cpuid4(type, level);
}
#endif

View file

@ -45,6 +45,7 @@ void *GetZipCdir(const uint8_t *p, size_t n) {
i = n - 4;
asm("" : "+x"(pk));
do {
#ifdef __x86_64__
if (i >= 14) {
x = *(const v2di *)(p + i - 14);
if (!(__builtin_ia32_pmovmskb128(
@ -55,6 +56,7 @@ void *GetZipCdir(const uint8_t *p, size_t n) {
continue;
}
}
#endif
while (magic = READ32LE(p + i), magic != kZipCdir64LocatorMagic &&
magic != kZipCdirHdrMagic &&
i + 0x10000 + 0x1000 >= n && i > 0) {

View file

@ -60,6 +60,7 @@ noasan bool _isutf8(const void *data, size_t size) {
p = data;
e = p + size;
while (p < e) {
#ifdef __x86_64__
if (!((intptr_t)p & 15)) {
for (;;) {
if ((m = __builtin_ia32_pmovmskb128(*(xmm_t *)p >= (xmm_t){0}) ^
@ -75,6 +76,7 @@ noasan bool _isutf8(const void *data, size_t size) {
return true;
}
}
#endif
if (LIKELY((c = *p++ & 255) < 0200)) continue;
if (UNLIKELY(c < 0300)) return false;
switch (kUtf8Dispatch[c - 0300]) {

View file

@ -43,7 +43,7 @@ textstartup void *lz4cpy(void *dest, const void *blockdata, size_t blocksize) {
length += *ip;
} while (*ip++ == 255);
}
repmovsb(&op, &ip, length);
repmovsb((void **)&op, (const void **)&ip, length);
if (ip >= ipe) break;
offset = READ16LE(ip);
matchlen = token & fifteen;
@ -54,7 +54,7 @@ textstartup void *lz4cpy(void *dest, const void *blockdata, size_t blocksize) {
} while (*ip++ == 255);
}
match = op - offset;
repmovsb(&op, &match, (matchlen += 4));
repmovsb((void **)&op, (const void **)&match, (matchlen += 4));
}
return op;
}

View file

@ -34,6 +34,7 @@ static inline const unsigned char *memchr_pure(const unsigned char *s,
return 0;
}
#ifdef __x86_64__
noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
unsigned char c,
size_t n) {
@ -57,6 +58,7 @@ noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
}
return 0;
}
#endif
/**
* Returns pointer to first instance of character.
@ -68,6 +70,7 @@ noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
* @asyncsignalsafe
*/
void *memchr(const void *s, int c, size_t n) {
#ifdef __x86_64__
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, n);
@ -76,4 +79,7 @@ void *memchr(const void *s, int c, size_t n) {
r = memchr_pure(s, c, n);
}
return (void *)r;
#else
return memchr_pure(s, c, n);
#endif
}

View file

@ -34,6 +34,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
*/
noasan void *memmem(const void *haystack, size_t haystacklen,
const void *needle, size_t needlelen) {
#ifdef __x86_64__
char c;
xmm_t n, *v;
unsigned i, k, m;
@ -69,4 +70,17 @@ noasan void *memmem(const void *haystack, size_t haystacklen,
m &= ~(1 << k);
} while (m);
}
#else
size_t i, j;
if (!needlelen) return haystack;
if (needlelen > haystacklen) return 0;
for (i = 0; i < haystacklen; ++i) {
for (j = 0;; ++j) {
if (j == needlelen) return (/*unconst*/ char *)haystack + i;
if (i + j == haystacklen) break;
if (((char *)haystack)[i + j] != ((char *)needle)[j]) break;
}
}
return 0;
#endif
}

View file

@ -34,6 +34,7 @@ static inline const unsigned char *memrchr_pure(const unsigned char *s,
return 0;
}
#ifdef __x86_64__
noasan static inline const unsigned char *memrchr_sse(const unsigned char *s,
unsigned char c,
size_t n) {
@ -55,6 +56,7 @@ noasan static inline const unsigned char *memrchr_sse(const unsigned char *s,
}
return 0;
}
#endif
/**
* Returns pointer to first instance of character.
@ -66,6 +68,7 @@ noasan static inline const unsigned char *memrchr_sse(const unsigned char *s,
* @asyncsignalsafe
*/
void *memrchr(const void *s, int c, size_t n) {
#ifdef __x86_64__
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, n);
@ -74,4 +77,7 @@ void *memrchr(const void *s, int c, size_t n) {
r = memrchr_pure(s, c, n);
}
return (void *)r;
#else
return memrchr_pure(s, c, n);
#endif
}

View file

@ -34,6 +34,7 @@ static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c,
return 0;
}
#ifdef __x86_64__
noasan static inline const char16_t *memrchr16_sse(const char16_t *s,
char16_t c, size_t n) {
size_t i;
@ -54,6 +55,7 @@ noasan static inline const char16_t *memrchr16_sse(const char16_t *s,
}
return 0;
}
#endif
/**
* Returns pointer to first instance of character.
@ -65,6 +67,7 @@ noasan static inline const char16_t *memrchr16_sse(const char16_t *s,
* @asyncsignalsafe
*/
void *memrchr16(const void *s, int c, size_t n) {
#ifdef __x86_64__
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, n * 2);
@ -73,4 +76,7 @@ void *memrchr16(const void *s, int c, size_t n) {
r = memrchr16_pure(s, c, n);
}
return (void *)r;
#else
return memrchr16_pure(s, c, n);
#endif
}

View file

@ -33,6 +33,7 @@ static inline const unsigned char *rawmemchr_pure(const unsigned char *s,
}
}
#ifdef __x86_64__
noasan static inline const char *rawmemchr_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
@ -51,6 +52,7 @@ noasan static inline const char *rawmemchr_sse(const char *s, unsigned char c) {
m = __builtin_ctzll(m);
return (const char *)p + m;
}
#endif
/**
* Returns pointer to first instance of character.
@ -60,6 +62,7 @@ noasan static inline const char *rawmemchr_sse(const char *s, unsigned char c) {
* @return is pointer to first instance of c
*/
void *rawmemchr(const void *s, int c) {
#ifdef __x86_64__
const void *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, 1);
@ -68,4 +71,7 @@ void *rawmemchr(const void *s, int c) {
r = rawmemchr_pure(s, c);
}
return (void *)r;
#else
return rawmemchr_pure(s, c);
#endif
}

View file

@ -105,7 +105,7 @@ char *strrchr(const char *, int) strlenesque;
void *memrchr(const void *, int, size_t) strlenesque;
char16_t *strrchr16(const char16_t *, int) strlenesque;
void *memrchr16(const void *, int, size_t) strlenesque;
wchar_t *wcsrchr(const wchar_t *, int) strlenesque;
wchar_t *wcsrchr(const wchar_t *, wchar_t) strlenesque;
void *wmemrchr(const wchar_t *, wchar_t, size_t) strlenesque;
char *strpbrk(const char *, const char *) strlenesque;
char16_t *strpbrk16(const char16_t *, const char16_t *) strlenesque;

View file

@ -36,6 +36,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
* @see strstr()
*/
noasan char *strcasestr(const char *haystack, const char *needle) {
#ifdef __x86_64__
char c;
xmm_t *p;
size_t i;
@ -68,4 +69,18 @@ noasan char *strcasestr(const char *haystack, const char *needle) {
if (!*haystack++) break;
}
return 0;
#else
size_t i;
unsigned k, m;
if (haystack == needle || !*needle) return haystack;
for (;;) {
for (i = 0;; ++i) {
if (!needle[i]) return (/*unconst*/ char *)haystack;
if (!haystack[i]) break;
if (kToLower[needle[i] & 255] != kToLower[haystack[i] & 255]) break;
}
if (!*haystack++) break;
}
return 0;
#endif
}

View file

@ -31,6 +31,7 @@ static inline const char *strchr_pure(const char *s, int c) {
}
}
#ifdef __x86_64__
noasan static inline const char *strchr_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
@ -52,6 +53,7 @@ noasan static inline const char *strchr_sse(const char *s, unsigned char c) {
if (c && !*s) s = 0;
return s;
}
#endif
/**
* Returns pointer to first instance of character.
@ -64,6 +66,7 @@ noasan static inline const char *strchr_sse(const char *s, unsigned char c) {
* @vforksafe
*/
char *strchr(const char *s, int c) {
#ifdef __x86_64__
const char *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, 1);
@ -73,4 +76,7 @@ char *strchr(const char *s, int c) {
}
_unassert(!r || *r || !(c & 255));
return (char *)r;
#else
return strchr_pure(s, c);
#endif
}

View file

@ -31,6 +31,7 @@ static inline const char *strchrnul_pure(const char *s, int c) {
}
}
#ifdef __x86_64__
noasan static inline const char *strchrnul_sse(const char *s, unsigned char c) {
unsigned k;
unsigned m;
@ -49,6 +50,7 @@ noasan static inline const char *strchrnul_sse(const char *s, unsigned char c) {
}
return (const char *)p + __builtin_ctzl(m);
}
#endif
/**
* Returns pointer to first instance of character.
@ -61,6 +63,7 @@ noasan static inline const char *strchrnul_sse(const char *s, unsigned char c) {
* NUL terminator if c is not found
*/
char *strchrnul(const char *s, int c) {
#ifdef __x86_64__
const char *r;
if (X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, 1);
@ -70,4 +73,7 @@ char *strchrnul(const char *s, int c) {
}
_unassert((*r & 255) == (c & 255) || !*r);
return (char *)r;
#else
return strchrnul_pure(s, c);
#endif
}

View file

@ -30,6 +30,7 @@ typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
* @asyncsignalsafe
*/
noasan size_t strlen16(const char16_t *s) {
#ifdef __x86_64__
size_t n;
xmm_t z = {0};
unsigned m, k = (uintptr_t)s & 15;
@ -40,4 +41,9 @@ noasan size_t strlen16(const char16_t *s) {
n = (const char16_t *)p + (__builtin_ctzl(m) >> 1) - s;
if (IsAsan()) __asan_verify(s, n * 2);
return n;
#else
size_t n = 0;
while (*s++) ++n;
return n;
#endif
}

View file

@ -36,6 +36,7 @@ typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
* @see memmem()
*/
noasan char *strstr(const char *haystack, const char *needle) {
#ifdef __x86_64__
xmm_t *p;
size_t i;
unsigned k, m;
@ -66,4 +67,18 @@ noasan char *strstr(const char *haystack, const char *needle) {
if (!*haystack++) break;
}
return 0;
#else
size_t i;
unsigned k, m;
if (haystack == needle || !*needle) return haystack;
for (;;) {
for (i = 0;; ++i) {
if (!needle[i]) return (/*unconst*/ char *)haystack;
if (!haystack[i]) break;
if (needle[i] != haystack[i]) break;
}
if (!*haystack++) break;
}
return 0;
#endif
}

View file

@ -41,6 +41,7 @@ noasan static dontinline antiquity unsigned timingsafe_bcmp_sse(const char *p,
return w | w >> 32;
}
#ifdef __x86_64__
noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
const char *q,
size_t n) {
@ -74,6 +75,7 @@ noasan static microarchitecture("avx") int timingsafe_bcmp_avx(const char *p,
w = a[0] | a[1];
return w | w >> 32;
}
#endif
/**
* Tests inequality of first 𝑛 bytes of 𝑝 and 𝑞.
@ -140,11 +142,12 @@ int timingsafe_bcmp(const void *a, const void *b, size_t n) {
__asan_verify(a, n);
__asan_verify(b, n);
}
#ifdef __x86_64__
if (X86_HAVE(AVX)) {
return timingsafe_bcmp_avx(p, q, n);
} else {
return timingsafe_bcmp_sse(p, q, n);
}
#endif
return timingsafe_bcmp_sse(p, q, n);
}
} else if (n >= 4) {
__builtin_memcpy(&u0, p, 4);

View file

@ -1,26 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_STR_TPDECODE_H_
#define COSMOPOLITAN_LIBC_STR_TPDECODE_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int tpdecode(const char *, wint_t *) paramsnonnull((1)) libcesque;
#if defined(__MNO_RED_ZONE__) && defined(__GNUC__) && !defined(__STRICT_ANSI__)
#define tpdecode(S, OUT) __tpdecode(S, OUT)
forceinline int __tpdecode(const char *s, wint_t *out) {
int ax;
if (0 <= *s && *s <= 0x7f) {
*out = *s;
return 1;
}
asm("call\ttpdecode"
: "=a"(ax), "=m"(*(char(*)[6])s)
: "D"(s), "S"(out)
: "cc");
return ax;
}
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_TPDECODE_H_ */

View file

@ -1,41 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/str/str.h"
#include "libc/str/tpdecode.internal.h"
#include "libc/str/tpdecodecb.internal.h"
/* TODO(jart): DELETE */
forceinline int getbyte(void *arg, uint32_t i) {
return ((const unsigned char *)arg)[i];
}
/**
* Thompson-Pike Varint Decoder.
*
* @param s is a NUL-terminated string
* @return number of bytes successfully consumed or -1 w/ errno
* @note synchronization is performed
* @see libc/str/tpdecodecb.internal.h (for implementation)
* @deprecated
*/
int(tpdecode)(const char *s, wint_t *out) {
return tpdecodecb(out, (unsigned char)s[0], getbyte, (void *)s);
}

View file

@ -1,29 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_STR_TPENCODE_H_
#define COSMOPOLITAN_LIBC_STR_TPENCODE_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
unsigned tpencode(char *, size_t, wint_t, bool32) paramsnonnull() libcesque;
#ifndef __STRICT_ANSI__
#define tpencode(...) __tpencode(__VA_ARGS__)
forceinline unsigned __tpencode(char *p, size_t size, wint_t wc,
bool32 awesome) {
if (size >= 1 && (0x00 <= wc && wc <= 0x7f)) {
if (wc >= 32 || !awesome) {
p[0] = wc;
return 1;
} else if (size >= 2) {
p[0] = 0xc0;
p[1] = 0x80;
p[1] |= wc;
return 2;
}
}
return (tpencode)(p, size, wc, awesome);
}
#endif
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_STR_TPENCODE_H_ */

View file

@ -1,56 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/tpenc.h"
#include "libc/str/internal.h"
#include "libc/str/tpencode.internal.h"
/* TODO: DELETE */
/**
* Thompson-Pike Varint Encoder.
*
* Implementation Details: The header macro should ensure this function
* is only called for non-ASCII, or DCE'd entirely. In addition to that
* this function makes a promise to not clobber any registers but %rax.
*
* @param p is what ch gets encoded to
* @param size is the number of bytes available in buf
* @param ch is a 32-bit integer
* @param awesome mode enables numbers the IETF unilaterally banned
* @return number of bytes written
* @note this encoding was designed on a napkin in a new jersey diner
* @deprecated
*/
unsigned(tpencode)(char *p, size_t size, wint_t wc, bool32 awesome) {
int i, j;
unsigned long w;
if ((0 <= wc && wc < 32) && awesome && size >= 2) {
p[0] = 0xc0;
p[1] = 0x80;
p[1] |= wc;
return 2;
}
i = 0;
w = _tpenc(wc);
do {
if (!size--) break;
p[i++] = w & 0xff;
} while (w >>= 8);
return i;
}

View file

@ -30,6 +30,7 @@ typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
* @asyncsignalsafe
*/
noasan size_t wcslen(const wchar_t *s) {
#ifdef __x86_64__
size_t n;
xmm_t z = {0};
unsigned m, k = (uintptr_t)s & 15;
@ -40,4 +41,9 @@ noasan size_t wcslen(const wchar_t *s) {
n = (const wchar_t *)p + (__builtin_ctzl(m) >> 2) - s;
if (IsAsan()) __asan_verify(s, n);
return n;
#else
size_t n = 0;
while (*s++) ++n;
return n;
#endif
}

View file

@ -34,6 +34,7 @@ static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c,
return 0;
}
#ifdef __x86_64__
noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
size_t n) {
size_t i;
@ -54,6 +55,7 @@ noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
}
return 0;
}
#endif
/**
* Returns pointer to first instance of character.
@ -65,6 +67,7 @@ noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
* @asyncsignalsafe
*/
void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) {
#ifdef __x86_64__
size_t bytes;
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
@ -77,4 +80,7 @@ void *wmemrchr(const wchar_t *s, wchar_t c, size_t n) {
r = wmemrchr_pure(s, c, n);
}
return (void *)r;
#else
return wmemrchr_pure(s, c, n);
#endif
}