From a4601a24d34b451d15ff83bfc324c098c5ebec0f Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Thu, 23 Jun 2022 10:21:07 -0700 Subject: [PATCH] Perform some code cleanup --- libc/calls/nanosleep-xnu.c | 2 +- libc/calls/utimensat-xnu.c | 8 +- libc/intrin/futex.internal.h | 12 + .../div1000int64.S => intrin/futex_wait.c} | 41 +- .../doc/memrchr.c => intrin/futex_wake.c} | 63 ++- libc/intrin/intrin.mk | 2 + libc/intrin/pthread.h | 2 + libc/intrin/pthread_mutex_destroy.c | 7 + libc/intrin/pthread_mutex_lock.c | 26 +- libc/intrin/pthread_mutex_unlock.c | 2 +- libc/intrin/pthread_mutex_wake.c | 18 +- libc/intrin/sched_yield.S | 1 + libc/intrin/wait0.c | 5 +- libc/log/vflogf.c | 6 +- libc/nexgen32e/bsf.h | 14 - libc/nexgen32e/bsr.h | 14 - libc/nexgen32e/cachesize.h | 5 +- libc/nexgen32e/div1000000000int64.S | 30 -- libc/nexgen32e/div1000000int64.S | 29 -- libc/nexgen32e/div10000int64.S | 29 -- libc/nexgen32e/div100int64.S | 34 -- libc/nexgen32e/div10int64.S | 29 -- libc/nexgen32e/doc/README.txt | 1 - libc/nexgen32e/kcpuids.S | 12 + libc/nexgen32e/memeqmask.S | 48 --- libc/nexgen32e/memrchr16.S | 62 --- libc/nexgen32e/memrchr32.S | 62 --- libc/nexgen32e/nexgen32e.h | 17 - libc/nexgen32e/rem1000000000int64.S | 38 -- libc/nexgen32e/rem1000000int64.S | 38 -- libc/nexgen32e/rem10000int64.S | 38 -- libc/nexgen32e/rem1000int64.S | 38 -- libc/nexgen32e/rem100int64.S | 38 -- libc/nexgen32e/rem10int64.S | 39 -- libc/nexgen32e/slowcall.S | 83 ---- libc/nexgen32e/slowcall.h | 24 -- libc/nexgen32e/strcpyzbw.S | 33 -- libc/nexgen32e/strsak.S | 406 ------------------ libc/nexgen32e/strstr-sse42.S | 41 -- libc/nexgen32e/strstr.inc | 68 --- libc/nexgen32e/strstr16-sse42.S | 26 -- libc/str/memchr.c | 4 +- libc/str/memrchr.c | 4 +- .../doc/strcmp-avx2.c => str/memrchr16.c} | 87 ++-- libc/str/str.h | 2 - libc/str/strcmp.c | 5 +- libc/str/strnlen.c | 8 +- .../sidiv_test.c => libc/str/strnlen_s.c | 66 +-- libc/str/wmemrchr.c | 76 ++++ libc/thread/create.c | 2 + libc/thread/join.c | 1 - libc/thread/pthread_key_destruct.c | 2 +- libc/thread/wait.c | 44 +- libc/thread/zombie.c | 1 + libc/time/dsleep.c | 2 +- test/libc/calls/seccomp_test.c | 17 +- test/libc/intrin/pthread_mutex_lock_test.c | 6 +- test/libc/{str => intrin}/strlen_test.c | 4 +- test/libc/nexgen32e/memeqmask_test.c | 101 ----- test/libc/{nexgen32e => str}/memrchr16_test.c | 3 + test/libc/str/strnlen_test.c | 0 .../libc/str/wmemrchr_test.c | 53 +-- third_party/stb/stb_image_write.c | 14 +- 63 files changed, 350 insertions(+), 1643 deletions(-) create mode 100644 libc/intrin/futex.internal.h rename libc/{nexgen32e/div1000int64.S => intrin/futex_wait.c} (58%) rename libc/{nexgen32e/doc/memrchr.c => intrin/futex_wake.c} (64%) delete mode 100644 libc/nexgen32e/div1000000000int64.S delete mode 100644 libc/nexgen32e/div1000000int64.S delete mode 100644 libc/nexgen32e/div10000int64.S delete mode 100644 libc/nexgen32e/div100int64.S delete mode 100644 libc/nexgen32e/div10int64.S delete mode 100644 libc/nexgen32e/doc/README.txt delete mode 100644 libc/nexgen32e/memeqmask.S delete mode 100644 libc/nexgen32e/memrchr16.S delete mode 100644 libc/nexgen32e/memrchr32.S delete mode 100644 libc/nexgen32e/rem1000000000int64.S delete mode 100644 libc/nexgen32e/rem1000000int64.S delete mode 100644 libc/nexgen32e/rem10000int64.S delete mode 100644 libc/nexgen32e/rem1000int64.S delete mode 100644 libc/nexgen32e/rem100int64.S delete mode 100644 libc/nexgen32e/rem10int64.S delete mode 100644 libc/nexgen32e/slowcall.S delete mode 100644 libc/nexgen32e/slowcall.h delete mode 100644 libc/nexgen32e/strcpyzbw.S delete mode 100644 libc/nexgen32e/strsak.S delete mode 100644 libc/nexgen32e/strstr-sse42.S delete mode 100644 libc/nexgen32e/strstr.inc delete mode 100644 libc/nexgen32e/strstr16-sse42.S rename libc/{nexgen32e/doc/strcmp-avx2.c => str/memrchr16.c} (53%) rename test/libc/nexgen32e/sidiv_test.c => libc/str/strnlen_s.c (59%) create mode 100644 libc/str/wmemrchr.c rename test/libc/{str => intrin}/strlen_test.c (98%) delete mode 100644 test/libc/nexgen32e/memeqmask_test.c rename test/libc/{nexgen32e => str}/memrchr16_test.c (94%) create mode 100644 test/libc/str/strnlen_test.c rename libc/nexgen32e/doc/cescapec.c => test/libc/str/wmemrchr_test.c (66%) diff --git a/libc/calls/nanosleep-xnu.c b/libc/calls/nanosleep-xnu.c index 6517cfcf4..fd279330a 100644 --- a/libc/calls/nanosleep-xnu.c +++ b/libc/calls/nanosleep-xnu.c @@ -24,7 +24,7 @@ int sys_nanosleep_xnu(const struct timespec *req, struct timespec *rem) { long millis; - millis = div1000int64(req->tv_nsec); + millis = req->tv_nsec / 1000; millis = MAX(1, millis); return sys_select(0, 0, 0, 0, &(struct timeval){req->tv_sec, millis}); } diff --git a/libc/calls/utimensat-xnu.c b/libc/calls/utimensat-xnu.c index fb45e8085..5f585a810 100644 --- a/libc/calls/utimensat-xnu.c +++ b/libc/calls/utimensat-xnu.c @@ -41,19 +41,19 @@ int sys_utimensat_xnu(int dirfd, const char *path, const struct timespec ts[2], tv[0] = now; } else if (ts[0].tv_nsec == UTIME_OMIT) { tv[0].tv_sec = st.st_atim.tv_sec; - tv[0].tv_usec = div1000int64(st.st_atim.tv_nsec); + tv[0].tv_usec = st.st_atim.tv_nsec / 1000; } else { tv[0].tv_sec = ts[0].tv_sec; - tv[0].tv_usec = div1000int64(ts[0].tv_nsec); + tv[0].tv_usec = ts[0].tv_nsec / 1000; } if (ts[1].tv_nsec == UTIME_NOW) { tv[1] = now; } else if (ts[1].tv_nsec == UTIME_OMIT) { tv[1].tv_sec = st.st_mtim.tv_sec; - tv[1].tv_usec = div1000int64(st.st_mtim.tv_nsec); + tv[1].tv_usec = st.st_mtim.tv_nsec / 1000; } else { tv[1].tv_sec = ts[1].tv_sec; - tv[1].tv_usec = div1000int64(ts[1].tv_nsec); + tv[1].tv_usec = ts[1].tv_nsec / 1000; } } else { tv[0] = now; diff --git a/libc/intrin/futex.internal.h b/libc/intrin/futex.internal.h new file mode 100644 index 000000000..cb4d4714a --- /dev/null +++ b/libc/intrin/futex.internal.h @@ -0,0 +1,12 @@ +#ifndef COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_ +#define COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_ +#include "libc/calls/struct/timespec.h" +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +int _futex_wait(void *, int, struct timespec *); +int _futex_wake(void *, int); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_ */ diff --git a/libc/nexgen32e/div1000int64.S b/libc/intrin/futex_wait.c similarity index 58% rename from libc/nexgen32e/div1000int64.S rename to libc/intrin/futex_wait.c index 6d31784d2..10259f05e 100644 --- a/libc/nexgen32e/div1000int64.S +++ b/libc/intrin/futex_wait.c @@ -1,7 +1,7 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,14 +16,29 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" +#include "libc/bits/asmflag.h" +#include "libc/calls/strace.internal.h" +#include "libc/calls/struct/timespec.h" +#include "libc/intrin/describeflags.internal.h" +#include "libc/intrin/futex.internal.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/futex.h" +#include "libc/sysv/consts/nr.h" -// Divides 64-bit signed integer by 1,000. -// -// @param rdi is number to divide -// @return quotient -div1000int64: - mov $0x7,%cl - movabs $0x20c49ba5e353f7cf,%rdx - jmp tinydivsi - .endfn div1000int64,globl +privileged int _futex_wait(void *addr, int expect, struct timespec *timeout) { + int ax; + bool cf; + char buf[45]; + asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t" + "clc\n\t" + "syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax) + : "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect), + "g"(timeout) + : "rcx", "r10", "r11", "memory"); + if (cf) ax = -ax; + STRACE("futex(%p, FUTEX_WAIT, %d, %s) → %s", addr, expect, + DescribeTimespec(buf, sizeof(buf), 0, timeout), + ax ? strerrno(-ax) : "0"); + return ax; +} diff --git a/libc/nexgen32e/doc/memrchr.c b/libc/intrin/futex_wake.c similarity index 64% rename from libc/nexgen32e/doc/memrchr.c rename to libc/intrin/futex_wake.c index c28d6db22..974125299 100644 --- a/libc/nexgen32e/doc/memrchr.c +++ b/libc/intrin/futex_wake.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,39 +16,34 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/asmflag.h" +#include "libc/calls/strace.internal.h" +#include "libc/fmt/itoa.h" +#include "libc/intrin/futex.internal.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/futex.h" +#include "libc/sysv/consts/nr.h" -#define N 32 -typedef uint8_t uint8_v _Vector_size(N); - -/** - * Searches for last instance of character in memory region. - * - * @param s is binary data to search - * @param c is treated as unsigned char - * @param n is byte length of s - * @return address of last c in s, or NULL if not found - */ -void *memrchr(const void *s, int c, size_t n) { - unsigned char ch = (unsigned char)c; - const unsigned char *p = (const unsigned char *)s; - if (n >= 32 && CheckAvx2()) { - uint8_v cv; - __builtin_memset(&cv, ch, sizeof(cv)); - do { - uint32_t skip; - uint8_v sv, tv; - memcpy(&sv, s + n - N, N); - asm("vpcmpeqb\t%2,%3,%1\n\t" - "vpmovmskb\t%1,%0\n\t" - "lzcnt\t%0,%0" - : "=r"(skip), "=x"(tv) - : "x"(sv), "x"(cv)); - n -= skip; - if (skip != 32) break; - } while (n >= 32); +static const char *FormatFutexWakeResult(char buf[12], int ax) { + if (ax >= 0) { + FormatInt32(buf, ax); + return buf; + } else { + return strerrno(-ax); } - while (n--) { - if (p[n] == ch) return (/* unconst */ void *)&p[n]; - } - return NULL; +} + +privileged int _futex_wake(void *addr, int count) { + int ax; + bool cf; + char buf[12]; + asm volatile(CFLAG_ASM("clc\n\t" + "syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax) + : "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count) + : "rcx", "r11", "memory"); + if (cf) ax = -ax; + STRACE("futex(%p, FUTEX_WAKE, %d) → %s", addr, count, + FormatFutexWakeResult(buf, ax)); + return ax; } diff --git a/libc/intrin/intrin.mk b/libc/intrin/intrin.mk index 6c6a6cc46..dc5434378 100644 --- a/libc/intrin/intrin.mk +++ b/libc/intrin/intrin.mk @@ -73,6 +73,8 @@ o/$(MODE)/libc/intrin/kprintf.greg.o: \ -fno-stack-protector # synchronization primitives are intended to be magic free +o/$(MODE)/libc/intrin/futex_wait.o \ +o/$(MODE)/libc/intrin/futex_wake.o \ o/$(MODE)/libc/intrin/gettid.greg.o \ o/$(MODE)/libc/intrin/pthread_mutex_lock.o \ o/$(MODE)/libc/intrin/pthread_mutex_unlock.o \ diff --git a/libc/intrin/pthread.h b/libc/intrin/pthread.h index 487b214a1..684fa29a4 100644 --- a/libc/intrin/pthread.h +++ b/libc/intrin/pthread.h @@ -126,6 +126,7 @@ void *pthread_getspecific(pthread_key_t); !atomic_exchange(&(mutex)->lock, 1)) \ ? 0 \ : pthread_mutex_lock(mutex)) +/* #define pthread_mutex_unlock(mutex) \ ((mutex)->attr == PTHREAD_MUTEX_NORMAL \ ? (atomic_store_explicit(&(mutex)->lock, 0, memory_order_relaxed), \ @@ -134,6 +135,7 @@ void *pthread_getspecific(pthread_key_t); _pthread_mutex_wake(mutex)), \ 0) \ : pthread_mutex_unlock(mutex)) +*/ #endif int _pthread_mutex_wake(pthread_mutex_t *) hidden; diff --git a/libc/intrin/pthread_mutex_destroy.c b/libc/intrin/pthread_mutex_destroy.c index 07fd61d97..5633c7780 100644 --- a/libc/intrin/pthread_mutex_destroy.c +++ b/libc/intrin/pthread_mutex_destroy.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/errno.h" #include "libc/intrin/pthread.h" #include "libc/str/str.h" @@ -24,6 +25,12 @@ * @return 0 on success, or error number on failure */ int pthread_mutex_destroy(pthread_mutex_t *mutex) { + int rc; + if (!mutex->lock && !mutex->waits) { + rc = 0; + } else { + rc = EDEADLK; + } bzero(mutex, sizeof(*mutex)); return 0; } diff --git a/libc/intrin/pthread_mutex_lock.c b/libc/intrin/pthread_mutex_lock.c index 1becaa669..b0399f583 100644 --- a/libc/intrin/pthread_mutex_lock.c +++ b/libc/intrin/pthread_mutex_lock.c @@ -21,6 +21,7 @@ #include "libc/calls/calls.h" #include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/futex.internal.h" #include "libc/intrin/pthread.h" #include "libc/intrin/spinlock.h" #include "libc/linux/futex.h" @@ -28,38 +29,15 @@ #include "libc/sysv/consts/futex.h" #include "libc/sysv/consts/nr.h" -static inline int FutexWait(void *addr, int expect, struct timespec *timeout) { - int ax; - bool cf; - asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t" - "clc\n\t" - "syscall") - : CFLAG_CONSTRAINT(cf), "=a"(ax) - : "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect), - "g"(timeout) - : "rcx", "r10", "r11", "memory"); - if (cf) ax = -ax; - return ax; -} - static int pthread_mutex_lock_spin(pthread_mutex_t *mutex, int tries) { volatile int i; - struct timespec ts; if (tries < 7) { for (i = 0; i != 1 << tries; i++) { } tries++; } else if (IsLinux() || IsOpenbsd()) { atomic_fetch_add(&mutex->waits, 1); - if (tries < 28) { - ts.tv_sec = 0; - ts.tv_nsec = 4 << tries; - tries++; - } else { - ts.tv_sec = 1; - ts.tv_nsec = 0; - } - FutexWait(&mutex->lock, 1, &ts); + _futex_wait(&mutex->lock, 1, &(struct timespec){1}); atomic_fetch_sub(&mutex->waits, 1); } else { sched_yield(); diff --git a/libc/intrin/pthread_mutex_unlock.c b/libc/intrin/pthread_mutex_unlock.c index 726cf72ab..2556c045f 100644 --- a/libc/intrin/pthread_mutex_unlock.c +++ b/libc/intrin/pthread_mutex_unlock.c @@ -41,7 +41,7 @@ int(pthread_mutex_unlock)(pthread_mutex_t *mutex) { case PTHREAD_MUTEX_NORMAL: atomic_store_explicit(&mutex->lock, 0, memory_order_relaxed); if ((IsLinux() || IsOpenbsd()) && - atomic_load_explicit(&mutex->waits, memory_order_relaxed)) { + atomic_load_explicit(&mutex->waits, memory_order_relaxed) > 0) { _pthread_mutex_wake(mutex); } return 0; diff --git a/libc/intrin/pthread_mutex_wake.c b/libc/intrin/pthread_mutex_wake.c index aea4699ef..e6d0d40dd 100644 --- a/libc/intrin/pthread_mutex_wake.c +++ b/libc/intrin/pthread_mutex_wake.c @@ -16,23 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/asmflag.h" +#include "libc/intrin/futex.internal.h" #include "libc/intrin/pthread.h" -#include "libc/sysv/consts/futex.h" -#include "libc/sysv/consts/nr.h" - -static inline int FutexWake(void *addr, int count) { - int ax; - bool cf; - asm volatile(CFLAG_ASM("clc\n\t" - "syscall") - : CFLAG_CONSTRAINT(cf), "=a"(ax) - : "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count) - : "rcx", "r11", "memory"); - if (cf) ax = -ax; - return ax; -} int _pthread_mutex_wake(pthread_mutex_t *mutex) { - return FutexWake(&mutex->lock, 1); + return _futex_wake(&mutex->lock, 1); } diff --git a/libc/intrin/sched_yield.S b/libc/intrin/sched_yield.S index 5efb639cf..ab6384fff 100644 --- a/libc/intrin/sched_yield.S +++ b/libc/intrin/sched_yield.S @@ -30,6 +30,7 @@ sched_yield: testb IsXnu() jz 1f pause + xor %eax,%eax ret #endif diff --git a/libc/intrin/wait0.c b/libc/intrin/wait0.c index 19883467a..d5ba99673 100644 --- a/libc/intrin/wait0.c +++ b/libc/intrin/wait0.c @@ -19,6 +19,7 @@ #include "libc/bits/atomic.h" #include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/intrin/futex.internal.h" #include "libc/intrin/wait0.internal.h" #include "libc/linux/futex.h" @@ -34,8 +35,8 @@ void _wait0(int *ptid) { for (;;) { if (!(x = atomic_load_explicit(ptid, memory_order_relaxed))) { break; - } else if (IsLinux()) { - LinuxFutexWait(ptid, x, 0); + } else if (IsLinux() || IsOpenbsd()) { + _futex_wait(ptid, x, &(struct timespec){2}); } else { sched_yield(); } diff --git a/libc/log/vflogf.c b/libc/log/vflogf.c index c89a7a3d8..9d4b4e191 100644 --- a/libc/log/vflogf.c +++ b/libc/log/vflogf.c @@ -111,9 +111,9 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f, if (bufmode == _IOLBF) f->bufmode = _IOFBF; if ((fprintf_unlocked)(f, "%r%c%s%06ld:%s:%d:%.*s:%d] ", - "FEWIVDNT"[level & 7], buf32, - rem1000000int64(div1000int64(dots)), file, line, - strchrnul(prog, '.') - prog, prog, getpid()) <= 0) { + "FEWIVDNT"[level & 7], buf32, dots / 1000 % 1000000, + file, line, strchrnul(prog, '.') - prog, prog, + getpid()) <= 0) { vflogf_onfail(f); } (vfprintf_unlocked)(f, fmt, va); diff --git a/libc/nexgen32e/bsf.h b/libc/nexgen32e/bsf.h index 8464e0959..57d6a4a8f 100644 --- a/libc/nexgen32e/bsf.h +++ b/libc/nexgen32e/bsf.h @@ -3,20 +3,6 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -/* - * BIT SCANNING 101 - * ctz(𝑥) 31^clz(𝑥) clz(𝑥) - * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) - * 0x00000000 wut 32 0 wut 32 - * 0x00000001 0 0 1 0 31 - * 0x80000001 0 0 1 31 0 - * 0x80000000 31 31 32 31 0 - * 0x00000010 4 4 5 4 27 - * 0x08000010 4 4 5 27 4 - * 0x08000000 27 27 28 27 4 - * 0xffffffff 0 0 1 31 0 - */ - int bsf(int) pureconst; int bsfl(long) pureconst; int bsfll(long long) pureconst; diff --git a/libc/nexgen32e/bsr.h b/libc/nexgen32e/bsr.h index 46c479c84..7a4179bdd 100644 --- a/libc/nexgen32e/bsr.h +++ b/libc/nexgen32e/bsr.h @@ -3,20 +3,6 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -/* - * BIT SCANNING 101 - * ctz(𝑥) 31^clz(𝑥) clz(𝑥) - * uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥) - * 0x00000000 wut 32 0 wut 32 - * 0x00000001 0 0 1 0 31 - * 0x80000001 0 0 1 31 0 - * 0x80000000 31 31 32 31 0 - * 0x00000010 4 4 5 4 27 - * 0x08000010 4 4 5 27 4 - * 0x08000000 27 27 28 27 4 - * 0xffffffff 0 0 1 31 0 - */ - int bsr(int) pureconst; int bsrl(long) pureconst; int bsrll(long long) pureconst; diff --git a/libc/nexgen32e/cachesize.h b/libc/nexgen32e/cachesize.h index 10764c235..3f028c63c 100644 --- a/libc/nexgen32e/cachesize.h +++ b/libc/nexgen32e/cachesize.h @@ -1,12 +1,13 @@ #ifndef COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_ #define COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ #define kCpuCacheTypeData 1 #define kCpuCacheTypeInstruction 2 #define kCpuCacheTypeUnified 3 +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + unsigned getcachesize(int, int); COSMOPOLITAN_C_END_ diff --git a/libc/nexgen32e/div1000000000int64.S b/libc/nexgen32e/div1000000000int64.S deleted file mode 100644 index b8eb3fced..000000000 --- a/libc/nexgen32e/div1000000000int64.S +++ /dev/null @@ -1,30 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Divides 64-bit signed integer by 1,000,000,000. -// -// @param rdi is number to divide -// @return quotient -div1000000000int64: - mov $0x1a,%cl - movabs $0x112e0be826d694b3,%rdx - jmp tinydivsi - .globl tinydivsi - .endfn div1000000000int64,globl diff --git a/libc/nexgen32e/div1000000int64.S b/libc/nexgen32e/div1000000int64.S deleted file mode 100644 index d3cf80e10..000000000 --- a/libc/nexgen32e/div1000000int64.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Divides 64-bit signed integer by 1,000,000. -// -// @param rdi is number to divide -// @return quotient -div1000000int64: - mov $0x12,%cl - movabs $0x431bde82d7b634db,%rdx - jmp tinydivsi - .endfn div1000000int64,globl diff --git a/libc/nexgen32e/div10000int64.S b/libc/nexgen32e/div10000int64.S deleted file mode 100644 index d737a1fac..000000000 --- a/libc/nexgen32e/div10000int64.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Divides 64-bit signed integer by 10,000. -// -// @param rdi is number to divide -// @return truncated quotient -div10000int64: - mov $11,%cl - movabs $0x346dc5d63886594b,%rdx - jmp tinydivsi - .endfn div10000int64,globl diff --git a/libc/nexgen32e/div100int64.S b/libc/nexgen32e/div100int64.S deleted file mode 100644 index 7251d963d..000000000 --- a/libc/nexgen32e/div100int64.S +++ /dev/null @@ -1,34 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Divides 64-bit signed integer by 100. -// -// @param rdi is number to divide -// @return rax has quotient -div100int64: - mov %rdi,%rax - movabs $-6640827866535438581,%rdx - imul %rdx - lea (%rdx,%rdi),%rax - sar $63,%rdi - sar $6,%rax - sub %rdi,%rax - ret - .endfn div100int64,globl diff --git a/libc/nexgen32e/div10int64.S b/libc/nexgen32e/div10int64.S deleted file mode 100644 index eb72913ab..000000000 --- a/libc/nexgen32e/div10int64.S +++ /dev/null @@ -1,29 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Divides 64-bit signed integer by 10. -// -// @param rdi is number to divide -// @return quotient -div10int64: - mov $2,%cl - movabs $0x6666666666666667,%rdx - jmp tinydivsi - .endfn div10int64,globl diff --git a/libc/nexgen32e/doc/README.txt b/libc/nexgen32e/doc/README.txt deleted file mode 100644 index e14c73c32..000000000 --- a/libc/nexgen32e/doc/README.txt +++ /dev/null @@ -1 +0,0 @@ -These files aren't intended to be compiled. diff --git a/libc/nexgen32e/kcpuids.S b/libc/nexgen32e/kcpuids.S index 3f6191e31..a696533f8 100644 --- a/libc/nexgen32e/kcpuids.S +++ b/libc/nexgen32e/kcpuids.S @@ -53,7 +53,19 @@ kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info) mov %rdi,%r8 xor %eax,%eax 1: xor %ecx,%ecx +#ifdef FEATURELESS +// It's been reported that GDB reverse debugging doesn't +// understand VEX encoding. The workaround is to put: +// +// CPPFLAGS = -DFEATURELESS +// +// Inside your ~/.cosmo.mk file. + xor %eax,%eax + xor %ebx,%ebx + xor %edx,%edx +#else cpuid +#endif stosl xchg %eax,%ebx stosl diff --git a/libc/nexgen32e/memeqmask.S b/libc/nexgen32e/memeqmask.S deleted file mode 100644 index 426934662..000000000 --- a/libc/nexgen32e/memeqmask.S +++ /dev/null @@ -1,48 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Creates bit mask of which bytes are the same. -// -// @param %rdi points to bit mask (write-only) -// @param %rsi points to first buffer (read-only) -// @param %rdx points to second buffer (read-only) -// @param %rcx is byte length of both %rsi and %rdx -// @return %rax is set to %rdi -// @note buffers should be 128-byte aligned -memeqmask: - .leafprologue - xor %eax,%eax - test %ecx,%ecx - jz 1f - shr $3,%ecx -0: movdqa (%rsi,%rax,8),%xmm0 - movdqa 16(%rsi,%rax,8),%xmm1 - pcmpeqb (%rdx,%rax,8),%xmm0 - pcmpeqb 16(%rdx,%rax,8),%xmm1 - pmovmskb %xmm0,%r8d - pmovmskb %xmm1,%r9d - mov %r8w,(%rdi,%rax) - mov %r9w,2(%rdi,%rax) - add $4,%eax - cmp %ecx,%eax - jb 0b -1: mov %rdi,%rax - .leafepilogue - .endfn memeqmask,globl diff --git a/libc/nexgen32e/memrchr16.S b/libc/nexgen32e/memrchr16.S deleted file mode 100644 index c54e81894..000000000 --- a/libc/nexgen32e/memrchr16.S +++ /dev/null @@ -1,62 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/x86feature.h" -#include "libc/dce.h" -#include "libc/macros.internal.h" - -// Searches for last instance of uint16_t in memory region. -// -// @param rdi points to data to search -// @param esi is treated as uint16_t -// @param rdx is short count in rdi -// @return rax is address of last %si in %rdi, or NULL -// @note AVX2 requires Haswell (2014+) or Excavator (2015+) -memrchr16: - .leafprologue - .profilable -#if !IsTiny() - cmp $16,%rdx - jb 5f - testb X86_HAVE(AVX2)+kCpuids(%rip) - jz 5f - vmovd %esi,%xmm0 - vpbroadcastw %xmm0,%ymm0 -3: vmovdqu -32(%rdi,%rdx,2),%ymm1 - vpcmpeqw %ymm1,%ymm0,%ymm1 - vpmovmskb %ymm1,%eax - lzcnt %eax,%eax - shr %eax - mov %eax,%ecx - sub %rcx,%rdx - cmp $16,%eax - jne 5f - cmp $15,%rdx - ja 3b - vzeroupper -#endif -5: xor %eax,%eax - mov %rdx,%rcx -6: sub $1,%rcx - jb 9f - cmp %si,-2(%rdi,%rdx,2) - mov %rcx,%rdx - jne 6b - lea (%rdi,%rcx,2),%rax -9: .leafepilogue - .endfn memrchr16,globl diff --git a/libc/nexgen32e/memrchr32.S b/libc/nexgen32e/memrchr32.S deleted file mode 100644 index cd95050a4..000000000 --- a/libc/nexgen32e/memrchr32.S +++ /dev/null @@ -1,62 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/x86feature.h" -#include "libc/dce.h" -#include "libc/macros.internal.h" - -// Searches for last instance of wchar_t in memory region. -// -// @param rdi points to data to search -// @param esi is treated as int32_t (officially wchar_t) -// @param rdx is short count in rdi -// @return rax is address of last %esi in %rdi, or NULL -// @note AVX2 requires Haswell (2014+) or Excavator (2015+) -wmemrchr: - .leafprologue - .profilable -#if !IsTiny() - cmp $8,%rdx - jb 5f - testb X86_HAVE(AVX2)+kCpuids(%rip) - jz 5f - vmovd %esi,%xmm0 - vpbroadcastd %xmm0,%ymm0 -3: vmovdqu -32(%rdi,%rdx,4),%ymm1 - vpcmpeqd %ymm1,%ymm0,%ymm1 - vpmovmskb %ymm1,%eax - lzcnt %eax,%eax - shr $2,%eax - mov %eax,%ecx - sub %rcx,%rdx - cmp $8,%eax - jne 5f - cmp $7,%rdx - ja 3b - vzeroupper -#endif -5: xor %eax,%eax - mov %rdx,%rcx -6: sub $1,%rcx - jb 9f - cmp %esi,-4(%rdi,%rdx,4) - mov %rcx,%rdx - jne 6b - lea (%rdi,%rcx,4),%rax -9: .leafepilogue - .endfn wmemrchr,globl diff --git a/libc/nexgen32e/nexgen32e.h b/libc/nexgen32e/nexgen32e.h index af1fff796..b301332e1 100644 --- a/libc/nexgen32e/nexgen32e.h +++ b/libc/nexgen32e/nexgen32e.h @@ -10,23 +10,6 @@ void imapxlatab(void *); void insertionsort(int32_t *, size_t); void CheckStackIsAligned(void); -int64_t div10int64(int64_t) libcesque pureconst; -int64_t div100int64(int64_t) libcesque pureconst; -int64_t div1000int64(int64_t) libcesque pureconst; -int64_t div10000int64(int64_t) libcesque pureconst; -int64_t div1000000int64(int64_t) libcesque pureconst; -int64_t div1000000000int64(int64_t) libcesque pureconst; - -int64_t rem10int64(int64_t) libcesque pureconst; -int64_t rem100int64(int64_t) libcesque pureconst; -int64_t rem1000int64(int64_t) libcesque pureconst; -int64_t rem10000int64(int64_t) libcesque pureconst; -int64_t rem1000000int64(int64_t) libcesque pureconst; -int64_t rem1000000000int64(int64_t) libcesque pureconst; - -char sbb(uint64_t *, const uint64_t *, const uint64_t *, size_t); -char adc(uint64_t *, const uint64_t *, const uint64_t *, size_t); - COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* COSMOPOLITAN_LIBC_NEXGEN32E_NEXGEN32E_H_ */ diff --git a/libc/nexgen32e/rem1000000000int64.S b/libc/nexgen32e/rem1000000000int64.S deleted file mode 100644 index 0b48e6634..000000000 --- a/libc/nexgen32e/rem1000000000int64.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 1,000,000,000. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem1000000000int64: - movabs $0x112e0be826d694b3,%rdx - mov %rdi,%rax - imul %rdx - mov %rdx,%rax - sar $0x1a,%rax - mov %rdi,%rdx - sar $0x3f,%rdx - sub %rdx,%rax - imul $0x3b9aca00,%rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem1000000000int64,globl diff --git a/libc/nexgen32e/rem1000000int64.S b/libc/nexgen32e/rem1000000int64.S deleted file mode 100644 index 3d80a80b4..000000000 --- a/libc/nexgen32e/rem1000000int64.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 1,000,000. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem1000000int64: - movabs $0x431bde82d7b634db,%rdx - mov %rdi,%rax - imul %rdx - mov %rdx,%rax - sar $0x12,%rax - mov %rdi,%rdx - sar $0x3f,%rdx - sub %rdx,%rax - imul $0xf4240,%rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem1000000int64,globl diff --git a/libc/nexgen32e/rem10000int64.S b/libc/nexgen32e/rem10000int64.S deleted file mode 100644 index 3cc71b870..000000000 --- a/libc/nexgen32e/rem10000int64.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 10,000. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem10000int64: - mov %rdi,%rax - movabsq $0x346dc5d63886594b,%rdx - imulq %rdx - mov %rdx,%rax - mov %rdi,%rdx - sar $11,%rax - sar $63,%rdx - sub %rdx,%rax - imulq $10000,%rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem10000int64,globl diff --git a/libc/nexgen32e/rem1000int64.S b/libc/nexgen32e/rem1000int64.S deleted file mode 100644 index 3b94342da..000000000 --- a/libc/nexgen32e/rem1000int64.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 1,000. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem1000int64: - movabs $0x20c49ba5e353f7cf,%rdx - mov %rdi,%rax - imul %rdx - mov %rdx,%rax - sar $0x7,%rax - mov %rdi,%rdx - sar $0x3f,%rdx - sub %rdx,%rax - imul $0x3e8,%rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem1000int64,globl diff --git a/libc/nexgen32e/rem100int64.S b/libc/nexgen32e/rem100int64.S deleted file mode 100644 index d8bc78f09..000000000 --- a/libc/nexgen32e/rem100int64.S +++ /dev/null @@ -1,38 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 100. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem100int64: - mov %rdi,%rax - movabsq $-6640827866535438581,%rdx - imul %rdx - lea (%rdx,%rdi),%rax - mov %rdi,%rdx - sar $6,%rax - sar $63,%rdx - sub %rdx,%rax - imul $100,%rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem100int64,globl diff --git a/libc/nexgen32e/rem10int64.S b/libc/nexgen32e/rem10int64.S deleted file mode 100644 index d980611cd..000000000 --- a/libc/nexgen32e/rem10int64.S +++ /dev/null @@ -1,39 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// Returns 𝑥 % 10. -// -// @param rdi int64 𝑥 -// @return rax has remainder -rem10int64: - movabs $0x6666666666666667,%rdx - mov %rdi,%rax - imul %rdx - mov %rdx,%rax - sar $0x2,%rax - mov %rdi,%rdx - sar $0x3f,%rdx - sub %rdx,%rax - lea (%rax,%rax,4),%rax - add %rax,%rax - sub %rax,%rdi - mov %rdi,%rax - ret - .endfn rem10int64,globl diff --git a/libc/nexgen32e/slowcall.S b/libc/nexgen32e/slowcall.S deleted file mode 100644 index 68e37c4f0..000000000 --- a/libc/nexgen32e/slowcall.S +++ /dev/null @@ -1,83 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" -#include "libc/notice.inc" - -// Applies no-clobber guarantee to System Five function call. -// -// - Reentrant -// - Realigns stack -// - Doesn't assume red zone -// - Clobbers nothing (except %rax and flags) -// -// This function may be called using an stdcall convention. It's -// useful for files named FOO.hookabi.c and BAR.ncabi.c to make -// calls into other parts of the system, that don't conform to the -// same restricted ABI. -// -// @param six args and fn addr pushed on stack in reverse order -// @return %rax has function return value, and stack is cleaned up -// @see libc/shadowargs.hook.c for intended use case -slowcall: - #param %r9 # 0x40 arg6 - #param %r8 # 0x38 arg5 - #param %rcx # 0x30 arg4 - #param %rdx # 0x28 arg3 - #param %rsi # 0x20 arg2 - #param %rdi # 0x18 arg1 - #param %rax # 0x10 call address - #param # 0x08 return address - push %rbp # 0x00 parent frame - mov %rsp,%rbp # ---- - push %rdi #-0x08 - push %rsi #-0x10 - push %rdx #-0x18 - push %rcx #-0x20 - push %r8 #-0x28 - push %r9 #-0x30 - push %r10 #-0x38 - push %r11 #-0x40 - mov 0x10(%rbp),%rax - mov 0x18(%rbp),%rdi - mov 0x20(%rbp),%rsi - mov 0x28(%rbp),%rdx - mov 0x30(%rbp),%rcx - mov 0x38(%rbp),%r8 - mov 0x40(%rbp),%r9 - and $-16,%rsp - call *%rax - push %rax - mov 0x00(%rbp),%rax - mov %rax,0x38(%rbp) - mov 0x08(%rbp),%rax - mov %rax,0x40(%rbp) - pop %rax - lea -0x40(%rbp),%rsp - pop %r11 - pop %r10 - pop %r9 - pop %r8 - pop %rcx - pop %rdx - pop %rsi - pop %rdi - lea 0x38(%rbp),%rsp - pop %rbp - ret - .endfn slowcall,globl diff --git a/libc/nexgen32e/slowcall.h b/libc/nexgen32e/slowcall.h deleted file mode 100644 index 5666f1a11..000000000 --- a/libc/nexgen32e/slowcall.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_ -#define COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) - -#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \ - ({ \ - void *ax; \ - asm volatile("push\t%7\n\t" \ - "push\t%6\n\t" \ - "push\t%5\n\t" \ - "push\t%4\n\t" \ - "push\t%3\n\t" \ - "push\t%2\n\t" \ - "push\t%1\n\t" \ - "call\tslowcall" \ - : "=a"(ax) \ - : "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \ - "g"(arg5), "g"(arg6) \ - : "memory"); \ - ax; \ - }) - -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_ */ diff --git a/libc/nexgen32e/strcpyzbw.S b/libc/nexgen32e/strcpyzbw.S deleted file mode 100644 index 61d01e6dd..000000000 --- a/libc/nexgen32e/strcpyzbw.S +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" - -// TODO(jart): pmovzxbw and vpunpcklbw -strcpyzbw: - .leafprologue - .profilable - push %rdi - xor %eax,%eax -1: lodsb - stosw - test %al,%al - jnz 1b - pop %rax - .leafepilogue - .endfn strcpyzbw,globl diff --git a/libc/nexgen32e/strsak.S b/libc/nexgen32e/strsak.S deleted file mode 100644 index 503fb6ac9..000000000 --- a/libc/nexgen32e/strsak.S +++ /dev/null @@ -1,406 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/x86feature.h" -#include "libc/nexgen32e/macros.h" -#include "libc/macros.internal.h" - -// Returns length of NUL-terminated string w/ security blankets. -// -// This is like strnlen() except it'll return 0 if (1) RDI is NULL -// or (2) a NUL-terminator wasn't found in the first RSI bytes. -// -// @param rdi is a nullable NUL-terminated string pointer -// @param rsi is the maximum number of bytes to consider -// @return rax is the number of bytes, excluding the NUL -strnlen_s: - .leafprologue - .profilable - xor %eax,%eax - xor %r10d,%r10d - test %rdi,%rdi - jnz 0f - .leafepilogue -0: xor %edx,%edx - mov %rdi,%r8 -// 𝑠𝑙𝑖𝑑𝑒 - .endfn strnlen_s,globl - -// Swiss army knife of string character scanning. -// Used to be fourteen fast functions in one. -// -// @param rdi is non-null string memory -// @param rsi is max number of bytes to consider -// @param dl is search character #1 -// @param dh is search character #2 -// @param r8 is subtracted from result (for length vs. pointer) -// @param r9 masks result if DH is found (for NUL vs. NULL) -// @param r10 masks result on bytes exhausted (for length v. NULL) -// @return rax end pointer after r8/r9/r10 modifications -strsak: lea -1(%rdi),%rax -1: add $1,%rax - sub $1,%rsi - jb .Lend - test $31,%al - jz .Lfast -.Lbyte: mov (%rax),%cl - cmp %cl,%dl - je .Ldone - cmp %cl,%dh - je .Lnul - jmp 1b -.Ldone: sub %r8,%rax - jmp .Lret -.Lend: mov %r10,%r9 -.Lnul: sub %r8,%rax - and %r9,%rax -.Lret: .leafepilogue -.Lslow: add $32,%rsi - jmp .Lbyte -.Lfast: movzbl %dl,%ecx - movd %ecx,%xmm0 - movzbl %dh,%ecx - movd %ecx,%xmm1 - sub $32,%rax -#if !X86_NEED(AVX2) - testb X86_HAVE(AVX2)+kCpuids(%rip) - jz .Lsse2 -#endif - vpbroadcastb %xmm0,%ymm0 - vpbroadcastb %xmm1,%ymm1 -1: add $32,%rax - sub $32,%rsi -9: jb .Lslow - vmovdqa (%rax),%ymm2 - vpcmpeqb %ymm0,%ymm2,%ymm3 - vpcmpeqb %ymm1,%ymm2,%ymm2 - vpor %ymm3,%ymm2,%ymm2 - vpmovmskb %ymm2,%ecx - bsf %ecx,%ecx - je 1b - vzeroupper -2: add %rcx,%rax - jmp .Lbyte -#if !X86_NEED(AVX2) -.Lsse2: pbroadcastb %xmm0 - pbroadcastb %xmm1 -1: add $32,%rax - sub $32,%rsi - jb 9b - movdqa (%rax),%xmm2 - movdqa 16(%rax),%xmm3 - movdqa %xmm3,%xmm4 - pcmpeqb %xmm0,%xmm3 - pcmpeqb %xmm1,%xmm4 - por %xmm4,%xmm3 - pmovmskb %xmm3,%ecx - shl $16,%ecx - movdqa %xmm2,%xmm4 - pcmpeqb %xmm0,%xmm2 - pcmpeqb %xmm1,%xmm4 - por %xmm4,%xmm2 - pmovmskb %xmm2,%r11d - or %r11d,%ecx - bsf %ecx,%ecx - je 1b - jmp 2b -#endif - .endfn strsak,globl,hidden - -/* benchmarked on intel core i7-6700 @ 3.40GHz (skylake) - includes function call overhead (unless marked otherwise) - - your strlen, &c (strsak+avx2) for #c per n where c ≈ 0.293ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 47.000 36.375 35.141 99 - 1 35.000 34.625 36.234 96 - 2 31.500 18.812 18.992 184 - 3 19.667 13.042 13.182 265 - 4 30.750 10.281 10.285 339 - 7 15.857 8.946 7.551 462 - 8 12.125 9.203 7.119 490 - 15 10.467 5.475 4.601 758 - 16 6.812 5.523 4.798 727 - 31 5.387 4.327 3.517 992 - 32 4.719 1.645 1.532 2278 - 63 5.000 2.403 2.034 1715 - 64 2.047 0.779 0.788 4427 - 127 2.134 1.194 1.027 3399 - 128 1.742 0.444 0.419 8327 - 255 0.945 0.594 0.554 6295 - 256 0.574 0.271 0.264 13226 - 511 0.785 0.362 0.307 11384 - 512 0.326 0.178 0.151 23134 - 1023 0.288 0.242 0.185 18862 - 1024 0.208 0.114 0.107 32565 - 2047 0.235 0.127 0.123 28430 - 2048 0.127 0.090 0.084 41413 - 4095 0.119 0.106 0.099 35116 - 4096 0.100 0.081 0.079 44372 - 8191 0.092 0.082 0.081 43176 - 8192 0.081 0.072 0.071 49419 - 16383 0.076 0.072 0.071 48847 - 16384 0.071 0.068 0.067 52381 - 32767 0.072 0.069 0.068 51154 - 32768 0.068 0.066 0.065 53409 - - your tinystrlen() - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 53.000 33.625 33.672 97 - 1 33.000 32.125 32.234 101 - 2 24.500 19.438 17.711 184 - 3 23.667 12.875 11.911 273 - 4 13.750 9.281 9.238 352 - 7 11.000 6.125 5.801 560 - 8 7.625 5.609 5.232 621 - 15 11.800 3.825 3.364 966 - 16 4.562 3.648 3.173 1024 « optimal - 31 3.710 2.851 2.298 1414 - 32 3.031 2.254 2.159 1506 « dropoff - 63 2.683 1.827 1.691 1922 - 64 2.078 1.932 1.689 1924 - 127 1.630 1.647 1.622 2004 - 128 1.727 1.671 1.652 1968 - 255 1.392 1.450 1.435 2265 - 256 1.473 1.427 1.437 2262 - 511 1.325 1.353 1.337 2431 - 512 1.408 1.343 1.337 2431 - 1023 1.289 1.281 1.287 2525 - 1024 1.269 1.295 1.297 2506 - 2047 1.269 1.274 1.269 2561 - 2048 1.280 1.263 1.281 2538 - 4095 1.262 1.270 1.266 2568 - 4096 1.270 1.264 1.265 2570 - 8191 1.253 1.254 1.254 2592 - 8192 1.219 1.224 1.225 2653 - 16383 1.225 1.222 1.220 2663 - 16384 1.226 1.221 1.222 2659 - 32767 1.227 1.224 1.223 2658 - 32768 1.220 1.221 1.222 2659 - - glibc strlen for #c per n where c ≈ 0.273ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 3497.000 53.125 42.641 82 - 1 69.000 44.875 42.547 82 - 2 45.500 24.188 21.852 160 - 3 23.000 15.625 14.557 240 - 4 22.250 11.406 10.637 328 - 7 10.143 6.768 6.230 560 - 8 11.125 5.797 5.486 636 - 15 5.800 3.142 2.859 1220 - 16 7.062 3.070 2.737 1275 - 31 2.806 1.585 1.407 2481 - 32 3.156 1.574 1.349 2587 - 63 2.016 0.895 0.691 5049 - 64 1.328 0.744 0.670 5207 - 127 1.441 0.521 0.407 8577 - 128 0.648 0.454 0.405 8619 - 255 0.553 0.286 0.214 16277 - 256 0.387 0.235 0.218 15984 - 511 0.456 0.151 0.129 27077 - 512 0.182 0.134 0.129 27117 - 1023 0.171 0.106 0.082 42795 - 1024 0.112 0.088 0.082 42741 - 2047 0.099 0.069 0.059 59537 - 2048 0.072 0.060 0.058 59925 - 4095 0.065 0.053 0.047 74122 - 4096 0.061 0.048 0.047 74478 - 8191 0.048 0.045 0.044 79117 - 8192 0.051 0.045 0.044 79181 - 16383 0.042 0.040 0.061 57018 - 16384 0.069 0.063 0.061 57245 - 32767 0.081 0.073 0.068 51426 - 32768 0.084 0.072 0.068 51285 - - GCC strlen (-Os REPNZ SCASB) for #c per n where c ≈ 0.293ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 103.000 84.125 88.766 37 - 1 81.000 85.125 87.328 37 - 2 43.500 44.562 45.508 71 - 3 33.000 30.208 30.995 105 - 4 24.750 23.156 23.113 141 - 7 17.000 13.054 15.355 212 - 8 13.375 14.047 13.982 232 - 15 9.533 9.258 55.111 59 - 16 6.312 6.352 6.364 511 - 31 4.032 4.141 4.141 785 - 32 3.969 4.059 4.048 803 - 63 2.937 2.970 2.995 1086 - 64 2.922 2.939 2.956 1100 - 127 2.386 2.408 2.403 1353 - 128 2.383 2.403 2.401 1354 - 255 2.129 2.118 2.124 1530 - 256 2.137 2.133 2.130 1526 - 511 1.982 1.986 3.351 970 - 512 1.982 1.990 1.986 1637 - 1023 1.915 1.916 2.587 1257 - 1024 1.868 1.867 1.866 1742 - 2047 1.835 1.833 1.832 1775 - 2048 1.830 1.831 1.832 1775 - 4095 1.814 1.814 1.815 1791 - 4096 1.810 1.815 1.815 1791 - 8191 1.805 1.807 1.806 1800 - 8192 1.805 1.806 1.806 1800 - 16383 1.803 1.756 1.756 1851 - 16384 1.758 1.756 1.756 1851 - 32767 1.756 1.754 1.754 1853 - 32768 1.756 1.754 1.754 1853 - - Intel Optimz. Manual (SSE4.2) for #c per n where c ≈ 0.273ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 37.000 43.125 34.078 102 - 1 33.000 33.875 34.016 103 - 2 39.500 17.188 17.555 199 - 3 18.333 12.208 12.036 290 - 4 30.250 9.344 9.137 382 - 7 14.429 5.732 5.766 605 - 8 7.875 6.797 5.354 652 - 15 10.733 5.825 3.516 993 - 16 3.812 2.383 2.325 1501 - 31 4.097 2.609 2.079 1678 - 32 3.031 1.395 1.349 2587 - 63 2.937 1.558 1.079 3235 - 64 2.016 0.893 0.690 5056 - 127 1.929 0.721 0.607 5745 - 128 0.617 0.483 0.428 8147 - 255 1.275 0.404 0.411 8486 - 256 0.480 0.319 0.299 11681 - 511 0.479 0.307 0.288 12127 - 512 0.322 0.244 0.232 15013 - 1023 0.324 0.224 0.225 15512 - 1024 0.245 0.240 0.223 15651 - 2047 0.222 0.213 0.206 16938 - 2048 0.204 0.194 0.192 18140 - 4095 0.204 0.188 0.185 18888 - 4096 0.183 0.179 0.179 19446 - 8191 0.179 0.176 0.174 20000 - 8192 0.174 0.172 0.171 20383 - 16383 0.171 0.170 0.169 20604 - 16384 0.169 0.169 0.168 20808 - 32767 0.213 0.225 0.267 13064 - 32768 0.231 0.215 0.220 15852 - - musl libc strlen for #c per n where c ≈ 0.273ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 65.000 36.125 37.984 92 - 1 39.000 37.625 37.422 93 - 2 41.500 21.938 20.695 169 - 3 22.333 17.625 15.859 220 - 4 21.250 13.656 12.105 288 - 7 22.143 9.018 7.609 459 - 8 31.125 7.234 7.346 475 - 15 11.267 5.025 4.709 741 - 16 9.438 4.039 3.849 907 - 31 4.871 3.133 2.488 1402 - 32 5.219 2.246 2.039 1712 - 63 4.302 1.462 1.407 2479 - 64 2.109 1.428 1.155 3023 - 127 1.551 1.078 0.879 3971 - 128 1.742 0.903 0.760 4591 - 255 0.922 0.558 0.605 5764 - 256 0.934 0.575 0.537 6495 - 511 0.550 0.493 0.455 7674 - 512 0.646 0.490 0.426 8183 - 1023 0.550 0.439 0.425 8203 - 1024 0.472 0.421 0.408 8549 - 2047 0.507 0.334 0.373 9360 - 2048 0.403 0.426 0.409 8540 - 4095 0.391 0.240 0.236 14799 - 4096 0.238 0.222 0.221 15766 - 8191 0.225 0.223 0.221 15779 - 8192 0.225 0.214 0.215 16250 - 16383 0.212 0.212 0.210 16595 - 16384 0.209 0.210 0.211 16535 - 32767 0.214 0.208 0.205 17001 - 32768 0.207 0.207 0.291 12002 - - newlib strlen for #c per n where c ≈ 0.273ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 33.000 34.625 34.141 102 - 1 33.000 34.125 33.984 103 - 2 58.500 18.562 17.508 199 - 3 16.333 12.792 12.016 290 - 4 19.250 9.219 9.215 379 - 7 17.571 6.089 5.685 614 - 8 16.625 5.078 5.432 642 - 15 8.467 4.042 3.207 1088 - 16 3.938 2.773 2.733 1277 - 31 3.645 1.673 1.598 2183 - 32 3.281 1.527 1.493 2338 - 63 2.619 1.042 0.895 3901 - 64 1.422 0.928 0.813 4294 - 127 0.984 0.718 0.561 6222 - 128 1.195 0.591 0.532 6558 - 255 0.600 0.404 0.397 8785 - 256 0.621 0.429 0.376 9280 - 511 0.346 0.311 0.306 11421 - 512 0.420 0.308 0.296 11776 - 1023 0.284 0.285 0.285 12237 - 1024 0.321 0.282 0.280 12456 - 2047 0.253 0.252 0.252 13864 - 2048 0.260 0.249 0.249 14012 - 4095 0.236 0.236 0.236 14811 - 4096 0.239 0.235 0.234 14906 - 8191 0.233 0.228 0.227 15371 - 8192 0.230 0.227 0.227 15397 - 16383 0.223 0.224 0.223 15638 - 16384 0.223 0.224 0.223 15663 - 32767 0.224 0.387 0.225 15527 - 32768 0.223 0.222 0.222 15724 - - Agner Fog's strlen (SSE2) for #c per n where c ≈ 0.273ns - N x1 x8 x64 mBps - ------------------------------------------------------------ - 1 59.000 38.375 38.453 91 - 1 37.000 38.625 38.234 91 - 2 18.500 19.062 19.273 181 - 3 13.000 12.792 12.859 271 - 4 9.250 9.594 9.660 361 - 7 5.286 5.554 5.502 634 - 8 4.625 4.703 4.791 728 - 15 2.600 2.858 2.622 1331 - 16 2.438 2.414 2.421 1442 - 31 2.161 1.399 1.290 2706 - 32 1.219 1.262 1.250 2793 - 63 1.508 0.875 0.693 5038 - 64 0.641 0.654 0.655 5328 - 127 1.205 0.406 0.379 9200 - 128 0.367 0.372 0.369 9463 - 255 0.467 0.310 0.235 14835 - 256 0.230 0.232 0.232 15034 - 511 0.272 0.181 0.159 21918 - 512 0.174 0.161 0.158 22148 - 1023 0.175 0.134 0.120 29043 - 1024 0.140 0.122 0.120 29005 - 2047 0.128 0.114 0.112 31205 - 2048 0.130 0.113 0.112 31242 - 4095 0.105 0.098 0.097 35984 - 4096 0.105 0.098 0.097 35973 - 8191 0.093 0.090 0.090 38953 - 8192 0.094 0.090 0.090 38986 - 16383 0.088 0.086 0.086 40648 - 16384 0.088 0.086 0.086 40652 - 32767 0.088 0.086 0.085 40956 - 32768 0.087 0.085 0.085 41114 */ diff --git a/libc/nexgen32e/strstr-sse42.S b/libc/nexgen32e/strstr-sse42.S deleted file mode 100644 index 8449d5f37..000000000 --- a/libc/nexgen32e/strstr-sse42.S +++ /dev/null @@ -1,41 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" -#include "libc/nexgen32e/pcmpstr.inc" -#include "libc/nexgen32e/strstr.inc" - -// TODO(jart): Fix me. -strstr_sse42: - .leafprologue - mov %rdi,%rax - xor %ecx,%ecx -0: mov $-16,%rdx -1: add $16,%rdx - movaps (%rsi,%rdx),%xmm0 -2: add %rcx,%rax - lea (%rax,%rdx),%rdi - pcmpistri $.Lequalordered,(%rdi),%xmm0 -3: ja 2b # !CF (no match) && !ZF (need NUL-term) - jnc 4f # !CF (no match) && ZF (NUL-terminator) - jno 0b # !OF ← CF && CX!=0 (matched at offset) - jns 1b # !SF ← NUL ∉ XMM1 (need to match more) - jmp 5f # youtu.be/nVk1DjMtLWs -4: xor %eax,%eax -5: .leafepilogue - .endfn strstr_sse42,globl,hidden diff --git a/libc/nexgen32e/strstr.inc b/libc/nexgen32e/strstr.inc deleted file mode 100644 index 31ff56462..000000000 --- a/libc/nexgen32e/strstr.inc +++ /dev/null @@ -1,68 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -/* clang-format off */ - -// Searches for substring. -// -// @param rdi is NUL-terminated haystack string -// @param rsi is NUL-terminated needle string (16-byte aligned) -// @return rax is pointer to substring or NULL -// @todo 10x faster than naïve but could be 100x faster -.macro .strstr mode:req - push %rbp - mov %rsp,%rbp - .profilable - sub $32,%rsp - mov %rdi,%rax - xor %ecx,%ecx -0: mov $-16,%rdx -1: add $16,%rdx - movaps (%rsi,%rdx),%xmm0 -2: add %rcx,%rax - lea (%rax,%rdx),%rdi - test $15,%edi - jnz 6f - pcmpistri $\mode,(%rdi),%xmm0 -3: ja 2b # !CF (no match) && !ZF (need NUL-term) - jnc 4f # !CF (no match) && ZF (NUL-terminator) - jno 0b # !OF ← CF && CX!=0 (matched at offset) - jns 1b # !SF ← NUL ∉ XMM1 (need to match more) - jmp 5f # youtu.be/nVk1DjMtLWs -4: xor %eax,%eax -5: leave - ret -6: mov %rdi,%r9 # same w/ pointer realign - and $15,%r9d - mov %edi,%r8d - and $0xfff,%r8d - cmp $0xff0,%r8d - ja 8f -7: pcmpistri $\mode,(%rdi),%xmm0 - cmova %r9d,%ecx - jmp 3b -8: pcmpeqd %xmm2,%xmm2 # handle danger memory - mov %rdi,%r8 - and $-16,%r8 - movaps (%r8),%xmm1 - movaps %xmm1,-32(%rbp) - movaps %xmm2,-16(%rbp) - pcmpistri $\mode,-32(%rbp,%r9),%xmm2 - jz 4b - jmp 7b -.endm diff --git a/libc/nexgen32e/strstr16-sse42.S b/libc/nexgen32e/strstr16-sse42.S deleted file mode 100644 index 8e3a4e66a..000000000 --- a/libc/nexgen32e/strstr16-sse42.S +++ /dev/null @@ -1,26 +0,0 @@ -/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ -│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/macros.internal.h" -#include "libc/nexgen32e/pcmpstr.inc" -#include "libc/nexgen32e/strstr.inc" - -// TODO(jart): Fix me. -strstr16$sse42: - .strstr .Lequalorder16 - .endfn strstr16$sse42,globl,hidden diff --git a/libc/str/memchr.c b/libc/str/memchr.c index 7e736ece7..b6ed468de 100644 --- a/libc/str/memchr.c +++ b/libc/str/memchr.c @@ -70,9 +70,7 @@ noasan static inline const unsigned char *memchr_sse(const unsigned char *s, void *memchr(const void *s, int c, size_t n) { const void *r; if (!IsTiny() && X86_HAVE(SSE)) { - if (IsAsan()) { - __asan_verify(s, n); - } + if (IsAsan()) __asan_verify(s, n); r = memchr_sse(s, c, n); } else { r = memchr_pure(s, c, n); diff --git a/libc/str/memrchr.c b/libc/str/memrchr.c index e629f3e6b..34a101492 100644 --- a/libc/str/memrchr.c +++ b/libc/str/memrchr.c @@ -68,9 +68,7 @@ noasan static inline const unsigned char *memrchr_sse(const unsigned char *s, void *memrchr(const void *s, int c, size_t n) { const void *r; if (!IsTiny() && X86_HAVE(SSE)) { - if (IsAsan()) { - __asan_verify(s, n); - } + if (IsAsan()) __asan_verify(s, n); r = memrchr_sse(s, c, n); } else { r = memrchr_pure(s, c, n); diff --git a/libc/nexgen32e/doc/strcmp-avx2.c b/libc/str/memrchr16.c similarity index 53% rename from libc/nexgen32e/doc/strcmp-avx2.c rename to libc/str/memrchr16.c index 7d5c5e307..72cf96473 100644 --- a/libc/nexgen32e/doc/strcmp-avx2.c +++ b/libc/str/memrchr16.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,46 +16,61 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bits.h" -#include "libc/str/internal.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" +#include "libc/str/str.h" -#define kVectorSize 32 /* x86+avx2 is 256-bit cpu */ +typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(2))); -typedef uint8_t uint8_v _Vector_size(kVectorSize); -typedef uint32_t vbitmask_t; +static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c, + size_t n) { + size_t i; + for (i = n; i--;) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} + +noasan static inline const char16_t *memrchr16_sse(const char16_t *s, + char16_t c, size_t n) { + size_t i; + unsigned k, m; + xmm_t v, t = {c, c, c, c, c, c, c, c}; + for (i = n; i >= 8;) { + v = *(const xmm_t *)(s + (i -= 8)); + m = __builtin_ia32_pmovmskb128(v == t); + if (m) { + m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1); + return s + i + m / 2; + } + } + while (i--) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} /** - * Returns how many bytes the utf16 string would be as utf8. + * Returns pointer to first instance of character. + * + * @param s is memory to search + * @param c is search byte which is masked with 65535 + * @param n is number of char16_t elements in `s` + * @return is pointer to first instance of c or NULL if not found + * @asyncsignalsafe */ -int strcmp_avx2(const char *s1, const char *s2) { - if (s1 == s2) return 0; - const unsigned char *p1 = (const unsigned char *)s1; - const unsigned char *p2 = (const unsigned char *)s2; - size_t i = -kVectorSize; -vLoop: - i += kVectorSize; -bLoop: - if (!IsPointerDangerous(p1 + i) && !IsPointerDangerous(p2 + i)) { - unsigned char zf; - vbitmask_t r1; - uint8_v v1, v2; - const uint8_v kZero = {0}; - asm(ZFLAG_ASM("vmovdqu\t%5,%2\n\t" /* move because gcc problematic */ - "vpcmpeqb\t%4,%2,%1\n\t" /* check for equality in p1 and p2 */ - "vpcmpeqb\t%6,%2,%2\n\t" /* check for nul in p1 */ - "vpandn\t%7,%1,%2\n\t" /* most complicated bitwise not ever */ - "vpor\t%2,%1,%1\n\t" /* check for nul in p2 */ - "pmovmskb\t%1,%3\n\t" /* turn 256 bits into 32 bits */ - "bsf\t%3,%3") /* find stop byte */ - : ZFLAG_CONSTRAINT(zf), "=x"(v1), "=x"(v2), "=r"(r1) - : "m"(*(const uint8_v *)(p1 + i)), "m"(*(const uint8_v *)(p2 + i)), - "x"(kZero), "m"(kVectorSize)); - if (zf) goto vLoop; - return p1[i + r1] - p2[i + r1]; +void *memrchr16(const void *s, int c, size_t n) { + const void *r; + if (!IsTiny() && X86_HAVE(SSE)) { + if (IsAsan()) __asan_verify(s, n * 2); + r = memrchr16_sse(s, c, n); } else { - i += 1; - int c; - if (!(c = p1[i - 1] - p2[i - 1]) && p1[i - 1] + p1[i - 1] != 0) goto bLoop; - return c; + r = memrchr16_pure(s, c, n); } + return (void *)r; } diff --git a/libc/str/str.h b/libc/str/str.h index 45b708fb3..b40970ea6 100644 --- a/libc/str/str.h +++ b/libc/str/str.h @@ -88,7 +88,6 @@ void *memmove(void *, const void *, size_t) memcpyesque; void *memcpy(void *restrict, const void *restrict, size_t) memcpyesque; void *mempcpy(void *restrict, const void *restrict, size_t) memcpyesque; void *memccpy(void *restrict, const void *restrict, int, size_t) memcpyesque; -void *memeqmask(void *, const void *, const void *, size_t) memcpyesque; void bcopy(const void *, void *, size_t) memcpyesque; void explicit_bzero(void *, size_t); @@ -173,7 +172,6 @@ wchar_t *wcsncat(wchar_t *, const wchar_t *, size_t) memcpyesque; char *strncpy(char *, const char *, size_t) memcpyesque; char *strtok(char *, const char *) paramsnonnull((2)) libcesque; char *strtok_r(char *, const char *, char **) paramsnonnull((2, 3)); -uint16_t *strcpyzbw(uint16_t *, const char *) memcpyesque; wchar_t *wcstok(wchar_t *, const wchar_t *, wchar_t **) paramsnonnull((2, 3)); char *wstrtrunc(uint16_t *) memcpyesque; char *wstrntrunc(uint16_t *, size_t) memcpyesque; diff --git a/libc/str/strcmp.c b/libc/str/strcmp.c index 48743cb1a..a13789931 100644 --- a/libc/str/strcmp.c +++ b/libc/str/strcmp.c @@ -20,10 +20,7 @@ #include "libc/str/str.h" static inline noasan uint64_t UncheckedAlignedRead64(const char *p) { - return (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 | - (uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 | - (uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 | - (uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000; + return *(uint64_t *)p; } /** diff --git a/libc/str/strnlen.c b/libc/str/strnlen.c index f07286a36..5eb82cba3 100644 --- a/libc/str/strnlen.c +++ b/libc/str/strnlen.c @@ -18,12 +18,14 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/bits/bits.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" #include "libc/str/str.h" static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) { uint64_t w; for (; i + 8 < n; i += 8) { - w = READ64LE(s + i); + w = *(uint64_t *)(s + i); if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) { i += (unsigned)__builtin_ctzll(w) >> 3; break; @@ -40,8 +42,9 @@ static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) { * @return byte length * @asyncsignalsafe */ -size_t strnlen(const char *s, size_t n) { +noasan size_t strnlen(const char *s, size_t n) { size_t i; + if (IsAsan() && n) __asan_verify(s, 1); for (i = 0; (uintptr_t)(s + i) & 7; ++i) { if (i == n || !s[i]) return i; } @@ -50,5 +53,6 @@ size_t strnlen(const char *s, size_t n) { if (i == n || !s[i]) break; } assert(i == n || (i < n && !s[i])); + if (IsAsan()) __asan_verify(s, i); return i; } diff --git a/test/libc/nexgen32e/sidiv_test.c b/libc/str/strnlen_s.c similarity index 59% rename from test/libc/nexgen32e/sidiv_test.c rename to libc/str/strnlen_s.c index 60008c029..7c6351d8a 100644 --- a/test/libc/nexgen32e/sidiv_test.c +++ b/libc/str/strnlen_s.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,32 +16,46 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/nexgen32e/nexgen32e.h" -#include "libc/testlib/testlib.h" +#include "libc/assert.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/str/str.h" -TEST(sidiv, smoke) { - EXPECT_EQ(13373133731337 / 10, div10int64(13373133731337)); - EXPECT_EQ(13373133731337 / 100, div100int64(13373133731337)); - EXPECT_EQ(13373133731337 / 1000, div1000int64(13373133731337)); - EXPECT_EQ(13373133731337 / 10000, div10000int64(13373133731337)); - EXPECT_EQ(13373133731337 / 1000000, div1000000int64(13373133731337)); - EXPECT_EQ(13373133731337 / 1000000000, div1000000000int64(13373133731337)); +static noasan size_t strnlen_s_x64(const char *s, size_t n, size_t i) { + uint64_t w; + for (; i + 8 < n; i += 8) { + w = *(uint64_t *)(s + i); + if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) { + i += (unsigned)__builtin_ctzll(w) >> 3; + break; + } + } + return i; } -TEST(sirem, smoke) { - EXPECT_EQ(13373133731337 % 10, rem10int64(13373133731337)); - EXPECT_EQ(13373133731337 % 100, rem100int64(13373133731337)); - EXPECT_EQ(13373133731337 % 1000, rem1000int64(13373133731337)); - EXPECT_EQ(13373133731337 % 10000, rem10000int64(13373133731337)); - EXPECT_EQ(13373133731337 % 1000000, rem1000000int64(13373133731337)); - EXPECT_EQ(13373133731337 % 1000000000, rem1000000000int64(13373133731337)); -} - -TEST(rem, euclid) { - ASSERT_EQ(-2, rem10int64(-12)); - ASSERT_EQ(-1, rem10int64(-1)); - ASSERT_EQ(0, rem10int64(0)); - ASSERT_EQ(1, rem10int64(1)); - ASSERT_EQ(9, rem10int64(9)); - ASSERT_EQ(1, rem10int64(11)); +/** + * Returns length of NUL-terminated string... securely. + * + * This is like strnlen() except it'll return 0 if `s` is null. We also + * make the assumption for the purposes of ASAN that `n` is the size of + * the buffer if `s` is non-null. + * + * @param s is string + * @param n is max length + * @return byte length + * @asyncsignalsafe + */ +noasan size_t strnlen_s(const char *s, size_t n) { + size_t i; + if (!s) return 0; + if (IsAsan()) __asan_verify(s, n); + for (i = 0; (uintptr_t)(s + i) & 7; ++i) { + if (i == n || !s[i]) return i; + } + i = strnlen_s_x64(s, n, i); + for (;; ++i) { + if (i == n || !s[i]) break; + } + assert(i == n || (i < n && !s[i])); + return i; } diff --git a/libc/str/wmemrchr.c b/libc/str/wmemrchr.c new file mode 100644 index 000000000..af45f8c57 --- /dev/null +++ b/libc/str/wmemrchr.c @@ -0,0 +1,76 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2021 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/nexgen32e/x86feature.h" +#include "libc/str/str.h" + +typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(4))); + +static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c, + size_t n) { + size_t i; + for (i = n; i--;) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} + +noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c, + size_t n) { + size_t i; + unsigned k, m; + xmm_t v, t = {c, c, c, c}; + for (i = n; i >= 4;) { + v = *(const xmm_t *)(s + (i -= 4)); + m = __builtin_ia32_pmovmskb128(v == t); + if (m) { + m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1); + return s + i + m / 4; + } + } + while (i--) { + if (s[i] == c) { + return s + i; + } + } + return 0; +} + +/** + * Returns pointer to first instance of character. + * + * @param s is memory to search + * @param c is search word + * @param n is number of wchar_t elements in `s` + * @return is pointer to first instance of c or NULL if not found + * @asyncsignalsafe + */ +void *wmemrchr(const void *s, wchar_t c, size_t n) { + const void *r; + if (!IsTiny() && X86_HAVE(SSE)) { + if (IsAsan()) __asan_verify(s, n * 4); + r = wmemrchr_sse(s, c, n); + } else { + r = wmemrchr_pure(s, c, n); + } + return (void *)r; +} diff --git a/libc/thread/create.c b/libc/thread/create.c index 00513dc35..b01b1c7e2 100644 --- a/libc/thread/create.c +++ b/libc/thread/create.c @@ -29,6 +29,7 @@ #include "libc/sysv/consts/clone.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" +#include "libc/thread/internal.h" #include "libc/thread/thread.h" STATIC_YOINK("_main_thread_ctor"); @@ -77,6 +78,7 @@ static int cthread_start(void *arg) { exitcode = (void *)rc.dx; } td->exitcode = exitcode; + _pthread_key_destruct(td->key); if (atomic_load(&td->state) & cthread_detached) { // we're still using the stack // thus we can't munmap it yet diff --git a/libc/thread/join.c b/libc/thread/join.c index 05dedb134..9f8c55d22 100644 --- a/libc/thread/join.c +++ b/libc/thread/join.c @@ -54,7 +54,6 @@ int cthread_join(cthread_t td, void **exitcode) { } else { if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) { while ((x = atomic_load(&td->tid))) { - // FUTEX_WAIT_PRIVATE makes it hang cthread_memory_wait32(&td->tid, x, 0); } } diff --git a/libc/thread/pthread_key_destruct.c b/libc/thread/pthread_key_destruct.c index 3c5a913fe..2cfc06161 100644 --- a/libc/thread/pthread_key_destruct.c +++ b/libc/thread/pthread_key_destruct.c @@ -30,7 +30,7 @@ StartOver: x = _pthread_key_usage[i]; while (x) { j = bsrl(x); - if ((dtor = _pthread_key_dtor[i * 64 + j]) && (value = key[i * 64 + j])) { + if ((value = key[i * 64 + j]) && (dtor = _pthread_key_dtor[i * 64 + j])) { key[i * 64 + j] = 0; dtor(value); goto StartOver; diff --git a/libc/thread/wait.c b/libc/thread/wait.c index 04b5335fa..567839695 100644 --- a/libc/thread/wait.c +++ b/libc/thread/wait.c @@ -18,53 +18,25 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/atomic.h" #include "libc/calls/calls.h" +#include "libc/calls/struct/timespec.h" #include "libc/dce.h" -#include "libc/sysv/consts/clock.h" -#include "libc/sysv/consts/futex.h" -#include "libc/thread/freebsd.internal.h" +#include "libc/errno.h" +#include "libc/intrin/futex.internal.h" #include "libc/thread/thread.h" int cthread_memory_wait32(int* addr, int val, const struct timespec* timeout) { size_t size; - struct _umtx_time *put, ut; if (IsLinux() || IsOpenbsd()) { - return sys_futex(addr, FUTEX_WAIT, val, timeout, 0); - -#if 0 - } else if (IsFreebsd()) { - if (!timeout) { - put = 0; - size = 0; - } else { - ut._flags = 0; - ut._clockid = CLOCK_REALTIME; - ut._timeout = *timeout; - put = &ut; - size = sizeof(ut); - } - return _umtx_op(addr, UMTX_OP_MUTEX_WAIT, 0, &size, put); -#endif - + return _futex_wait(addr, val, timeout); } else { - unsigned tries; - for (tries = 1; atomic_load(addr) == val; ++tries) { - if (tries & 7) { - __builtin_ia32_pause(); - } else { - sched_yield(); - } - } - return 0; + return sched_yield(); } } int cthread_memory_wake32(int* addr, int n) { if (IsLinux() || IsOpenbsd()) { - return sys_futex(addr, FUTEX_WAKE, n, 0, 0); -#if 0 - } else if (IsFreebsd()) { - return _umtx_op(addr, UMTX_OP_MUTEX_WAKE, n, 0, 0); -#endif + return _futex_wake(addr, n); + } else { + return 0; } - return -1; } diff --git a/libc/thread/zombie.c b/libc/thread/zombie.c index 11c2174df..8df6491fd 100644 --- a/libc/thread/zombie.c +++ b/libc/thread/zombie.c @@ -41,6 +41,7 @@ void cthread_zombies_add(cthread_t td) { void cthread_zombies_reap(void) { struct Zombie *z; + // TODO(jart): Is this right? Update to not use malloc/free? while ((z = atomic_load(&cthread_zombies)) && !atomic_load(&z->td->tid)) { if (atomic_compare_exchange_weak(&cthread_zombies, &z, z->next)) { munmap(z->td->alloc.bottom, z->td->alloc.top - z->td->alloc.bottom); diff --git a/libc/time/dsleep.c b/libc/time/dsleep.c index f7c5d094c..c261ffc2e 100644 --- a/libc/time/dsleep.c +++ b/libc/time/dsleep.c @@ -28,7 +28,7 @@ long double dsleep(long double secs) { struct timespec dur, rem; dur.tv_sec = secs; dur.tv_nsec = secs * 1e9; - dur.tv_nsec = rem1000000000int64(dur.tv_nsec); + dur.tv_nsec = dur.tv_nsec % 1000000000; if (secs > 1e-6) { nanosleep(&dur, &rem); secs = rem.tv_nsec; diff --git a/test/libc/calls/seccomp_test.c b/test/libc/calls/seccomp_test.c index 3ca8f0566..d18a4e96b 100644 --- a/test/libc/calls/seccomp_test.c +++ b/test/libc/calls/seccomp_test.c @@ -33,8 +33,23 @@ #include "libc/testlib/testlib.h" #include "tool/net/sandbox.h" +// It's been reported that Chromebooks return EINVAL here. +bool CanUseSeccomp(void) { + int ws, pid; + ASSERT_NE(-1, (pid = fork())); + if (!pid) { + if (seccomp(SECCOMP_SET_MODE_STRICT, 0, 0) != -1) { + _Exit1(0); + } else { + _Exit1(1); + } + } + EXPECT_NE(-1, wait(&ws)); + return WIFEXITED(ws) && !WEXITSTATUS(ws); +} + void SetUp(void) { - if (!__is_linux_2_6_23()) { + if (!__is_linux_2_6_23() || !CanUseSeccomp()) { exit(0); } } diff --git a/test/libc/intrin/pthread_mutex_lock_test.c b/test/libc/intrin/pthread_mutex_lock_test.c index 455c6f1f2..76fb4178a 100644 --- a/test/libc/intrin/pthread_mutex_lock_test.c +++ b/test/libc/intrin/pthread_mutex_lock_test.c @@ -131,7 +131,7 @@ TEST(pthread_mutex_lock, contention) { for (i = 0; i < THREADS; ++i) { munmap(stack[i], GetStackSize()); } - pthread_mutex_destroy(&lock); + EXPECT_EQ(0, pthread_mutex_destroy(&lock)); } TEST(pthread_mutex_lock, rcontention) { @@ -159,7 +159,7 @@ TEST(pthread_mutex_lock, rcontention) { for (i = 0; i < THREADS; ++i) { munmap(stack[i], GetStackSize()); } - pthread_mutex_destroy(&lock); + EXPECT_EQ(0, pthread_mutex_destroy(&lock)); } TEST(pthread_mutex_lock, econtention) { @@ -187,7 +187,7 @@ TEST(pthread_mutex_lock, econtention) { for (i = 0; i < THREADS; ++i) { munmap(stack[i], GetStackSize()); } - pthread_mutex_destroy(&lock); + EXPECT_EQ(0, pthread_mutex_destroy(&lock)); } int SpinlockWorker(void *p) { diff --git a/test/libc/str/strlen_test.c b/test/libc/intrin/strlen_test.c similarity index 98% rename from test/libc/str/strlen_test.c rename to test/libc/intrin/strlen_test.c index d77a2c77a..403218c9c 100644 --- a/test/libc/str/strlen_test.c +++ b/test/libc/intrin/strlen_test.c @@ -106,9 +106,9 @@ TEST(strnlen, nulNotFound_ReturnsSize) { } } -TEST(strnlen_s, nulNotFound_ReturnsZero) { +TEST(strnlen_s, nulNotFound) { char buf[3] = {1, 2, 3}; - ASSERT_EQ(0, strnlen_s(buf, 3)); + ASSERT_EQ(3, strnlen_s(buf, 3)); } TEST(strlen, fuzz) { diff --git a/test/libc/nexgen32e/memeqmask_test.c b/test/libc/nexgen32e/memeqmask_test.c deleted file mode 100644 index dfedfc68d..000000000 --- a/test/libc/nexgen32e/memeqmask_test.c +++ /dev/null @@ -1,101 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/bits.h" -#include "libc/runtime/buffer.h" -#include "libc/runtime/gc.internal.h" -#include "libc/stdio/stdio.h" -#include "libc/str/str.h" -#include "libc/testlib/testlib.h" -#include "libc/x/x.h" - -#define ALIGN 128 -#define BUFSIZE (8 * 32) -#define MASKSIZE (BUFSIZE / CHAR_BIT) - -const char kX[] = "aaaaaaaaeeeeeeeeeeeeeeeeeeeeeeee" - "e e" - "e e" - "e e" - "e e" - "e e" - "e e" - "eeeeeeeeeeeeeeeeeeeeeeeeeeeeee-e"; - -const char kY[] = "aaaaaaaaefffffffffffeffffffffff-" - "f z-" - "f f" - "f f" - "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" - "f f" - "f f" - "ffffffffffffffffffffffffffffff-f"; - -const char kM[] = "11111111100000000000100000000000" - "01111111111111111111111111111100" - "01111111111111111111111111111110" - "01111111111111111111111111111110" - "00000000000000000000000000000000" - "01111111111111111111111111111110" - "01111111111111111111111111111110" - "00000000000000000000000000000010"; - -dontdiscard char *binify(uint8_t *data, size_t size) { - uint8_t b; - size_t i, j; - char *s, *p; - p = s = xmalloc(size * CHAR_BIT + 1); - for (i = 0; i < size; ++i) { - b = data[i]; - for (j = 0; j < CHAR_BIT; ++j) { - *p++ = "01"[b & 1]; - b >>= 1; - } - } - *p = '\0'; - return s; -} - -TEST(memeqmask, test) { - struct GuardedBuffer x = {}, y = {}, m = {}; - memcpy(balloc(&x, ALIGN, BUFSIZE), kX, BUFSIZE); - memcpy(balloc(&y, ALIGN, BUFSIZE), kY, BUFSIZE); - balloc(&m, ALIGN, MASKSIZE); - EXPECT_EQ((intptr_t)m.p, (intptr_t)memeqmask(m.p, x.p, y.p, BUFSIZE)); - EXPECT_STREQ(kM, gc(binify(m.p, MASKSIZE))); - bfree(&m); - bfree(&x); - bfree(&y); -} - -#if 0 -#include "libc/rand/rand.h" -#include "libc/testlib/ezbench.h" -TEST(memeqmask, bench) { - size_t len = 64 * 1024; - char *m = xmemalign(64, DIMMASK(len)); - char *x = xmemalign(64, len); - char *y = xmemalign(64, len); - EZBENCH( - { - rngset(x, len, rand64, -1); - rngset(y, len, rand64, -1); - }, - memeqmask(m, x, y, len)); -} -#endif diff --git a/test/libc/nexgen32e/memrchr16_test.c b/test/libc/str/memrchr16_test.c similarity index 94% rename from test/libc/nexgen32e/memrchr16_test.c rename to test/libc/str/memrchr16_test.c index c6bd7edb2..f18b8873f 100644 --- a/test/libc/nexgen32e/memrchr16_test.c +++ b/test/libc/str/memrchr16_test.c @@ -19,6 +19,7 @@ #include "libc/nexgen32e/nexgen32e.h" #include "libc/str/str.h" #include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" TEST(memrchr16, test) { @@ -31,4 +32,6 @@ TEST(memrchr16, test) { BENCH(memrchr16, bench) { EZBENCH2("memrchr16", donothing, EXPROPRIATE(memrchr16(u"yo.hi.there", '.', 11))); + EZBENCH2("memrchr16 hyperion", donothing, + EXPROPRIATE(memrchr16(kHyperion, '.', kHyperionSize / 2))); } diff --git a/test/libc/str/strnlen_test.c b/test/libc/str/strnlen_test.c new file mode 100644 index 000000000..e69de29bb diff --git a/libc/nexgen32e/doc/cescapec.c b/test/libc/str/wmemrchr_test.c similarity index 66% rename from libc/nexgen32e/doc/cescapec.c rename to test/libc/str/wmemrchr_test.c index 2d94fcb2c..d0bcc6250 100644 --- a/libc/nexgen32e/doc/cescapec.c +++ b/test/libc/str/wmemrchr_test.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,40 +16,21 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/str/internal.h" +#include "libc/str/str.h" +#include "libc/testlib/ezbench.h" +#include "libc/testlib/hyperion.h" +#include "libc/testlib/testlib.h" -unsigned cescapec(int c) { - unsigned char ch = c; - switch (ch) { - case '\a': - return '\\' | 'a' << 8; - case '\b': - return '\\' | 'b' << 8; - case '\v': - return '\\' | 'v' << 8; - case '\f': - return '\\' | 'f' << 8; - case '\?': - return '\\' | '?' << 8; - case '\n': - return '\\' | 'n' << 8; - case '\r': - return '\\' | 'r' << 8; - case '\t': - return '\\' | 't' << 8; - case '\"': - return '\\' | '"' << 8; - case '\'': - return '\\' | '\'' << 8; - case '\\': - return '\\' | '\\' << 8; - default: { - if (ch >= 0x80 || !isprint(ch)) { - return '\\' | (ch / 64 + '0') << 8 | (ch % 64 / 8 + '0') << 16 | - (ch % 8 + '0') << 24; - } else { - return ch; - } - } - } +TEST(wmemrchr, test) { + EXPECT_EQ(NULL, wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '-', 27)); + EXPECT_STREQ(L".there", wmemrchr(L"yo.hi.there", '.', 11)); + EXPECT_STREQ(L".thereeuhcruhrceeuhcre", + wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '.', 27)); +} + +BENCH(wmemrchr, bench) { + EZBENCH2("wmemrchr", donothing, + EXPROPRIATE(wmemrchr(L"yo.hi.there", '.', 11))); + EZBENCH2("wmemrchr hyperion", donothing, + EXPROPRIATE(wmemrchr(kHyperion, '.', kHyperionSize / 4))); } diff --git a/third_party/stb/stb_image_write.c b/third_party/stb/stb_image_write.c index 771ce6143..138489f47 100644 --- a/third_party/stb/stb_image_write.c +++ b/third_party/stb/stb_image_write.c @@ -723,12 +723,14 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, quality = quality < 50 ? 5000 / quality : 200 - quality * 2; for (i = 0; i < 64; ++i) { - int uvti, yti = div100int64((YQT[i] * quality + 50)); - YTable[stbiw__jpg_ZigZag[i]] = - (unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti); - uvti = div100int64(UVQT[i] * quality + 50); - UVTable[stbiw__jpg_ZigZag[i]] = - (unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); + int uvti, yti = (YQT[i] * quality + 50) / 100; + YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1 + : yti > 255 ? 255 + : yti); + uvti = (UVQT[i] * quality + 50) / 100; + UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1 + : uvti > 255 ? 255 + : uvti); } for (row = 0, k = 0; row < 8; ++row) {