mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
cc1732bc42
- Perform some housekeeping on scalar math function code - Import ARM's Optimized Routines for SIMD string processing - Upgrade to latest Chromium zlib and enable more SIMD optimizations
70 lines
2.9 KiB
C
70 lines
2.9 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||
│ │
|
||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||
│ any purpose with or without fee is hereby granted, provided that the │
|
||
│ above copyright notice and this permission notice appear in all copies. │
|
||
│ │
|
||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||
#include "libc/str/str.h"
|
||
#ifndef __aarch64__
|
||
|
||
// TODO(jart): ASAN support here is important.
|
||
|
||
typedef char xmm_u __attribute__((__vector_size__(16), __aligned__(1)));
|
||
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(16)));
|
||
|
||
#ifdef __x86_64__
|
||
static inline noasan size_t stpcpy_sse2(char *d, const char *s, size_t i) {
|
||
xmm_t v, z = {0};
|
||
for (;;) {
|
||
v = *(xmm_t *)(s + i);
|
||
if (!__builtin_ia32_pmovmskb128(v == z)) {
|
||
*(xmm_u *)(d + i) = v;
|
||
i += 16;
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
return i;
|
||
}
|
||
#endif
|
||
|
||
/**
|
||
* Copies bytes from 𝑠 to 𝑑 until a NUL is encountered.
|
||
*
|
||
* @param 𝑑 is destination memory
|
||
* @param 𝑠 is a NUL-terminated string
|
||
* @note 𝑑 and 𝑠 can't overlap
|
||
* @return pointer to nul byte
|
||
* @asyncsignalsafe
|
||
*/
|
||
char *stpcpy(char *d, const char *s) {
|
||
size_t i = 0;
|
||
#ifdef __x86_64__
|
||
for (; (uintptr_t)(s + i) & 15; ++i) {
|
||
if (!(d[i] = s[i])) {
|
||
return d + i;
|
||
}
|
||
}
|
||
i = stpcpy_sse2(d, s, i);
|
||
#endif
|
||
for (;;) {
|
||
if (!(d[i] = s[i])) {
|
||
return d + i;
|
||
}
|
||
++i;
|
||
}
|
||
}
|
||
|
||
#endif /* __aarch64__ */
|