Perform some code cleanup

This commit is contained in:
Justine Tunney 2022-06-23 10:21:07 -07:00
parent 0dd9629562
commit a4601a24d3
63 changed files with 350 additions and 1643 deletions

View file

@ -24,7 +24,7 @@
int sys_nanosleep_xnu(const struct timespec *req, struct timespec *rem) {
long millis;
millis = div1000int64(req->tv_nsec);
millis = req->tv_nsec / 1000;
millis = MAX(1, millis);
return sys_select(0, 0, 0, 0, &(struct timeval){req->tv_sec, millis});
}

View file

@ -41,19 +41,19 @@ int sys_utimensat_xnu(int dirfd, const char *path, const struct timespec ts[2],
tv[0] = now;
} else if (ts[0].tv_nsec == UTIME_OMIT) {
tv[0].tv_sec = st.st_atim.tv_sec;
tv[0].tv_usec = div1000int64(st.st_atim.tv_nsec);
tv[0].tv_usec = st.st_atim.tv_nsec / 1000;
} else {
tv[0].tv_sec = ts[0].tv_sec;
tv[0].tv_usec = div1000int64(ts[0].tv_nsec);
tv[0].tv_usec = ts[0].tv_nsec / 1000;
}
if (ts[1].tv_nsec == UTIME_NOW) {
tv[1] = now;
} else if (ts[1].tv_nsec == UTIME_OMIT) {
tv[1].tv_sec = st.st_mtim.tv_sec;
tv[1].tv_usec = div1000int64(st.st_mtim.tv_nsec);
tv[1].tv_usec = st.st_mtim.tv_nsec / 1000;
} else {
tv[1].tv_sec = ts[1].tv_sec;
tv[1].tv_usec = div1000int64(ts[1].tv_nsec);
tv[1].tv_usec = ts[1].tv_nsec / 1000;
}
} else {
tv[0] = now;

View file

@ -0,0 +1,12 @@
#ifndef COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_
#define COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_
#include "libc/calls/struct/timespec.h"
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int _futex_wait(void *, int, struct timespec *);
int _futex_wake(void *, int);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_ */

View file

@ -1,7 +1,7 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,14 +16,29 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
#include "libc/bits/asmflag.h"
#include "libc/calls/strace.internal.h"
#include "libc/calls/struct/timespec.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/futex.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
// Divides 64-bit signed integer by 1,000.
//
// @param rdi is number to divide
// @return quotient
div1000int64:
mov $0x7,%cl
movabs $0x20c49ba5e353f7cf,%rdx
jmp tinydivsi
.endfn div1000int64,globl
privileged int _futex_wait(void *addr, int expect, struct timespec *timeout) {
int ax;
bool cf;
char buf[45];
asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t"
"clc\n\t"
"syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax)
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect),
"g"(timeout)
: "rcx", "r10", "r11", "memory");
if (cf) ax = -ax;
STRACE("futex(%p, FUTEX_WAIT, %d, %s) → %s", addr, expect,
DescribeTimespec(buf, sizeof(buf), 0, timeout),
ax ? strerrno(-ax) : "0");
return ax;
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,39 +16,34 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/asmflag.h"
#include "libc/calls/strace.internal.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/futex.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#define N 32
typedef uint8_t uint8_v _Vector_size(N);
static const char *FormatFutexWakeResult(char buf[12], int ax) {
if (ax >= 0) {
FormatInt32(buf, ax);
return buf;
} else {
return strerrno(-ax);
}
}
/**
* Searches for last instance of character in memory region.
*
* @param s is binary data to search
* @param c is treated as unsigned char
* @param n is byte length of s
* @return address of last c in s, or NULL if not found
*/
void *memrchr(const void *s, int c, size_t n) {
unsigned char ch = (unsigned char)c;
const unsigned char *p = (const unsigned char *)s;
if (n >= 32 && CheckAvx2()) {
uint8_v cv;
__builtin_memset(&cv, ch, sizeof(cv));
do {
uint32_t skip;
uint8_v sv, tv;
memcpy(&sv, s + n - N, N);
asm("vpcmpeqb\t%2,%3,%1\n\t"
"vpmovmskb\t%1,%0\n\t"
"lzcnt\t%0,%0"
: "=r"(skip), "=x"(tv)
: "x"(sv), "x"(cv));
n -= skip;
if (skip != 32) break;
} while (n >= 32);
}
while (n--) {
if (p[n] == ch) return (/* unconst */ void *)&p[n];
}
return NULL;
privileged int _futex_wake(void *addr, int count) {
int ax;
bool cf;
char buf[12];
asm volatile(CFLAG_ASM("clc\n\t"
"syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax)
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count)
: "rcx", "r11", "memory");
if (cf) ax = -ax;
STRACE("futex(%p, FUTEX_WAKE, %d) → %s", addr, count,
FormatFutexWakeResult(buf, ax));
return ax;
}

View file

@ -73,6 +73,8 @@ o/$(MODE)/libc/intrin/kprintf.greg.o: \
-fno-stack-protector
# synchronization primitives are intended to be magic free
o/$(MODE)/libc/intrin/futex_wait.o \
o/$(MODE)/libc/intrin/futex_wake.o \
o/$(MODE)/libc/intrin/gettid.greg.o \
o/$(MODE)/libc/intrin/pthread_mutex_lock.o \
o/$(MODE)/libc/intrin/pthread_mutex_unlock.o \

View file

@ -126,6 +126,7 @@ void *pthread_getspecific(pthread_key_t);
!atomic_exchange(&(mutex)->lock, 1)) \
? 0 \
: pthread_mutex_lock(mutex))
/*
#define pthread_mutex_unlock(mutex) \
((mutex)->attr == PTHREAD_MUTEX_NORMAL \
? (atomic_store_explicit(&(mutex)->lock, 0, memory_order_relaxed), \
@ -134,6 +135,7 @@ void *pthread_getspecific(pthread_key_t);
_pthread_mutex_wake(mutex)), \
0) \
: pthread_mutex_unlock(mutex))
*/
#endif
int _pthread_mutex_wake(pthread_mutex_t *) hidden;

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/pthread.h"
#include "libc/str/str.h"
@ -24,6 +25,12 @@
* @return 0 on success, or error number on failure
*/
int pthread_mutex_destroy(pthread_mutex_t *mutex) {
int rc;
if (!mutex->lock && !mutex->waits) {
rc = 0;
} else {
rc = EDEADLK;
}
bzero(mutex, sizeof(*mutex));
return 0;
}

View file

@ -21,6 +21,7 @@
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/futex.internal.h"
#include "libc/intrin/pthread.h"
#include "libc/intrin/spinlock.h"
#include "libc/linux/futex.h"
@ -28,38 +29,15 @@
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
static inline int FutexWait(void *addr, int expect, struct timespec *timeout) {
int ax;
bool cf;
asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t"
"clc\n\t"
"syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax)
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect),
"g"(timeout)
: "rcx", "r10", "r11", "memory");
if (cf) ax = -ax;
return ax;
}
static int pthread_mutex_lock_spin(pthread_mutex_t *mutex, int tries) {
volatile int i;
struct timespec ts;
if (tries < 7) {
for (i = 0; i != 1 << tries; i++) {
}
tries++;
} else if (IsLinux() || IsOpenbsd()) {
atomic_fetch_add(&mutex->waits, 1);
if (tries < 28) {
ts.tv_sec = 0;
ts.tv_nsec = 4 << tries;
tries++;
} else {
ts.tv_sec = 1;
ts.tv_nsec = 0;
}
FutexWait(&mutex->lock, 1, &ts);
_futex_wait(&mutex->lock, 1, &(struct timespec){1});
atomic_fetch_sub(&mutex->waits, 1);
} else {
sched_yield();

View file

@ -41,7 +41,7 @@ int(pthread_mutex_unlock)(pthread_mutex_t *mutex) {
case PTHREAD_MUTEX_NORMAL:
atomic_store_explicit(&mutex->lock, 0, memory_order_relaxed);
if ((IsLinux() || IsOpenbsd()) &&
atomic_load_explicit(&mutex->waits, memory_order_relaxed)) {
atomic_load_explicit(&mutex->waits, memory_order_relaxed) > 0) {
_pthread_mutex_wake(mutex);
}
return 0;

View file

@ -16,23 +16,9 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/asmflag.h"
#include "libc/intrin/futex.internal.h"
#include "libc/intrin/pthread.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
static inline int FutexWake(void *addr, int count) {
int ax;
bool cf;
asm volatile(CFLAG_ASM("clc\n\t"
"syscall")
: CFLAG_CONSTRAINT(cf), "=a"(ax)
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count)
: "rcx", "r11", "memory");
if (cf) ax = -ax;
return ax;
}
int _pthread_mutex_wake(pthread_mutex_t *mutex) {
return FutexWake(&mutex->lock, 1);
return _futex_wake(&mutex->lock, 1);
}

View file

@ -30,6 +30,7 @@ sched_yield:
testb IsXnu()
jz 1f
pause
xor %eax,%eax
ret
#endif

View file

@ -19,6 +19,7 @@
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/futex.internal.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/linux/futex.h"
@ -34,8 +35,8 @@ void _wait0(int *ptid) {
for (;;) {
if (!(x = atomic_load_explicit(ptid, memory_order_relaxed))) {
break;
} else if (IsLinux()) {
LinuxFutexWait(ptid, x, 0);
} else if (IsLinux() || IsOpenbsd()) {
_futex_wait(ptid, x, &(struct timespec){2});
} else {
sched_yield();
}

View file

@ -111,9 +111,9 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
if (bufmode == _IOLBF) f->bufmode = _IOFBF;
if ((fprintf_unlocked)(f, "%r%c%s%06ld:%s:%d:%.*s:%d] ",
"FEWIVDNT"[level & 7], buf32,
rem1000000int64(div1000int64(dots)), file, line,
strchrnul(prog, '.') - prog, prog, getpid()) <= 0) {
"FEWIVDNT"[level & 7], buf32, dots / 1000 % 1000000,
file, line, strchrnul(prog, '.') - prog, prog,
getpid()) <= 0) {
vflogf_onfail(f);
}
(vfprintf_unlocked)(f, fmt, va);

View file

@ -3,20 +3,6 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/*
* BIT SCANNING 101
* ctz(𝑥) 31^clz(𝑥) clz(𝑥)
* uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
* 0x00000000 wut 32 0 wut 32
* 0x00000001 0 0 1 0 31
* 0x80000001 0 0 1 31 0
* 0x80000000 31 31 32 31 0
* 0x00000010 4 4 5 4 27
* 0x08000010 4 4 5 27 4
* 0x08000000 27 27 28 27 4
* 0xffffffff 0 0 1 31 0
*/
int bsf(int) pureconst;
int bsfl(long) pureconst;
int bsfll(long long) pureconst;

View file

@ -3,20 +3,6 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/*
* BIT SCANNING 101
* ctz(𝑥) 31^clz(𝑥) clz(𝑥)
* uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
* 0x00000000 wut 32 0 wut 32
* 0x00000001 0 0 1 0 31
* 0x80000001 0 0 1 31 0
* 0x80000000 31 31 32 31 0
* 0x00000010 4 4 5 4 27
* 0x08000010 4 4 5 27 4
* 0x08000000 27 27 28 27 4
* 0xffffffff 0 0 1 31 0
*/
int bsr(int) pureconst;
int bsrl(long) pureconst;
int bsrll(long long) pureconst;

View file

@ -1,12 +1,13 @@
#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_
#define COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
#define kCpuCacheTypeData 1
#define kCpuCacheTypeInstruction 2
#define kCpuCacheTypeUnified 3
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
unsigned getcachesize(int, int);
COSMOPOLITAN_C_END_

View file

@ -1,30 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Divides 64-bit signed integer by 1,000,000,000.
//
// @param rdi is number to divide
// @return quotient
div1000000000int64:
mov $0x1a,%cl
movabs $0x112e0be826d694b3,%rdx
jmp tinydivsi
.globl tinydivsi
.endfn div1000000000int64,globl

View file

@ -1,29 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Divides 64-bit signed integer by 1,000,000.
//
// @param rdi is number to divide
// @return quotient
div1000000int64:
mov $0x12,%cl
movabs $0x431bde82d7b634db,%rdx
jmp tinydivsi
.endfn div1000000int64,globl

View file

@ -1,29 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Divides 64-bit signed integer by 10,000.
//
// @param rdi is number to divide
// @return truncated quotient
div10000int64:
mov $11,%cl
movabs $0x346dc5d63886594b,%rdx
jmp tinydivsi
.endfn div10000int64,globl

View file

@ -1,34 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Divides 64-bit signed integer by 100.
//
// @param rdi is number to divide
// @return rax has quotient
div100int64:
mov %rdi,%rax
movabs $-6640827866535438581,%rdx
imul %rdx
lea (%rdx,%rdi),%rax
sar $63,%rdi
sar $6,%rax
sub %rdi,%rax
ret
.endfn div100int64,globl

View file

@ -1,29 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Divides 64-bit signed integer by 10.
//
// @param rdi is number to divide
// @return quotient
div10int64:
mov $2,%cl
movabs $0x6666666666666667,%rdx
jmp tinydivsi
.endfn div10int64,globl

View file

@ -1 +0,0 @@
These files aren't intended to be compiled.

View file

@ -53,7 +53,19 @@ kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
mov %rdi,%r8
xor %eax,%eax
1: xor %ecx,%ecx
#ifdef FEATURELESS
// It's been reported that GDB reverse debugging doesn't
// understand VEX encoding. The workaround is to put:
//
// CPPFLAGS = -DFEATURELESS
//
// Inside your ~/.cosmo.mk file.
xor %eax,%eax
xor %ebx,%ebx
xor %edx,%edx
#else
cpuid
#endif
stosl
xchg %eax,%ebx
stosl

View file

@ -1,48 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Creates bit mask of which bytes are the same.
//
// @param %rdi points to bit mask (write-only)
// @param %rsi points to first buffer (read-only)
// @param %rdx points to second buffer (read-only)
// @param %rcx is byte length of both %rsi and %rdx
// @return %rax is set to %rdi
// @note buffers should be 128-byte aligned
memeqmask:
.leafprologue
xor %eax,%eax
test %ecx,%ecx
jz 1f
shr $3,%ecx
0: movdqa (%rsi,%rax,8),%xmm0
movdqa 16(%rsi,%rax,8),%xmm1
pcmpeqb (%rdx,%rax,8),%xmm0
pcmpeqb 16(%rdx,%rax,8),%xmm1
pmovmskb %xmm0,%r8d
pmovmskb %xmm1,%r9d
mov %r8w,(%rdi,%rax)
mov %r9w,2(%rdi,%rax)
add $4,%eax
cmp %ecx,%eax
jb 0b
1: mov %rdi,%rax
.leafepilogue
.endfn memeqmask,globl

View file

@ -1,62 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/x86feature.h"
#include "libc/dce.h"
#include "libc/macros.internal.h"
// Searches for last instance of uint16_t in memory region.
//
// @param rdi points to data to search
// @param esi is treated as uint16_t
// @param rdx is short count in rdi
// @return rax is address of last %si in %rdi, or NULL
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
memrchr16:
.leafprologue
.profilable
#if !IsTiny()
cmp $16,%rdx
jb 5f
testb X86_HAVE(AVX2)+kCpuids(%rip)
jz 5f
vmovd %esi,%xmm0
vpbroadcastw %xmm0,%ymm0
3: vmovdqu -32(%rdi,%rdx,2),%ymm1
vpcmpeqw %ymm1,%ymm0,%ymm1
vpmovmskb %ymm1,%eax
lzcnt %eax,%eax
shr %eax
mov %eax,%ecx
sub %rcx,%rdx
cmp $16,%eax
jne 5f
cmp $15,%rdx
ja 3b
vzeroupper
#endif
5: xor %eax,%eax
mov %rdx,%rcx
6: sub $1,%rcx
jb 9f
cmp %si,-2(%rdi,%rdx,2)
mov %rcx,%rdx
jne 6b
lea (%rdi,%rcx,2),%rax
9: .leafepilogue
.endfn memrchr16,globl

View file

@ -1,62 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/x86feature.h"
#include "libc/dce.h"
#include "libc/macros.internal.h"
// Searches for last instance of wchar_t in memory region.
//
// @param rdi points to data to search
// @param esi is treated as int32_t (officially wchar_t)
// @param rdx is short count in rdi
// @return rax is address of last %esi in %rdi, or NULL
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
wmemrchr:
.leafprologue
.profilable
#if !IsTiny()
cmp $8,%rdx
jb 5f
testb X86_HAVE(AVX2)+kCpuids(%rip)
jz 5f
vmovd %esi,%xmm0
vpbroadcastd %xmm0,%ymm0
3: vmovdqu -32(%rdi,%rdx,4),%ymm1
vpcmpeqd %ymm1,%ymm0,%ymm1
vpmovmskb %ymm1,%eax
lzcnt %eax,%eax
shr $2,%eax
mov %eax,%ecx
sub %rcx,%rdx
cmp $8,%eax
jne 5f
cmp $7,%rdx
ja 3b
vzeroupper
#endif
5: xor %eax,%eax
mov %rdx,%rcx
6: sub $1,%rcx
jb 9f
cmp %esi,-4(%rdi,%rdx,4)
mov %rcx,%rdx
jne 6b
lea (%rdi,%rcx,4),%rax
9: .leafepilogue
.endfn wmemrchr,globl

View file

@ -10,23 +10,6 @@ void imapxlatab(void *);
void insertionsort(int32_t *, size_t);
void CheckStackIsAligned(void);
int64_t div10int64(int64_t) libcesque pureconst;
int64_t div100int64(int64_t) libcesque pureconst;
int64_t div1000int64(int64_t) libcesque pureconst;
int64_t div10000int64(int64_t) libcesque pureconst;
int64_t div1000000int64(int64_t) libcesque pureconst;
int64_t div1000000000int64(int64_t) libcesque pureconst;
int64_t rem10int64(int64_t) libcesque pureconst;
int64_t rem100int64(int64_t) libcesque pureconst;
int64_t rem1000int64(int64_t) libcesque pureconst;
int64_t rem10000int64(int64_t) libcesque pureconst;
int64_t rem1000000int64(int64_t) libcesque pureconst;
int64_t rem1000000000int64(int64_t) libcesque pureconst;
char sbb(uint64_t *, const uint64_t *, const uint64_t *, size_t);
char adc(uint64_t *, const uint64_t *, const uint64_t *, size_t);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_NEXGEN32E_H_ */

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 1,000,000,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000000000int64:
movabs $0x112e0be826d694b3,%rdx
mov %rdi,%rax
imul %rdx
mov %rdx,%rax
sar $0x1a,%rax
mov %rdi,%rdx
sar $0x3f,%rdx
sub %rdx,%rax
imul $0x3b9aca00,%rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem1000000000int64,globl

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 1,000,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000000int64:
movabs $0x431bde82d7b634db,%rdx
mov %rdi,%rax
imul %rdx
mov %rdx,%rax
sar $0x12,%rax
mov %rdi,%rdx
sar $0x3f,%rdx
sub %rdx,%rax
imul $0xf4240,%rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem1000000int64,globl

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 10,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem10000int64:
mov %rdi,%rax
movabsq $0x346dc5d63886594b,%rdx
imulq %rdx
mov %rdx,%rax
mov %rdi,%rdx
sar $11,%rax
sar $63,%rdx
sub %rdx,%rax
imulq $10000,%rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem10000int64,globl

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 1,000.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem1000int64:
movabs $0x20c49ba5e353f7cf,%rdx
mov %rdi,%rax
imul %rdx
mov %rdx,%rax
sar $0x7,%rax
mov %rdi,%rdx
sar $0x3f,%rdx
sub %rdx,%rax
imul $0x3e8,%rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem1000int64,globl

View file

@ -1,38 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 100.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem100int64:
mov %rdi,%rax
movabsq $-6640827866535438581,%rdx
imul %rdx
lea (%rdx,%rdi),%rax
mov %rdi,%rdx
sar $6,%rax
sar $63,%rdx
sub %rdx,%rax
imul $100,%rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem100int64,globl

View file

@ -1,39 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Returns 𝑥 % 10.
//
// @param rdi int64 𝑥
// @return rax has remainder
rem10int64:
movabs $0x6666666666666667,%rdx
mov %rdi,%rax
imul %rdx
mov %rdx,%rax
sar $0x2,%rax
mov %rdi,%rdx
sar $0x3f,%rdx
sub %rdx,%rax
lea (%rax,%rax,4),%rax
add %rax,%rax
sub %rax,%rdi
mov %rdi,%rax
ret
.endfn rem10int64,globl

View file

@ -1,83 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
#include "libc/notice.inc"
// Applies no-clobber guarantee to System Five function call.
//
// - Reentrant
// - Realigns stack
// - Doesn't assume red zone
// - Clobbers nothing (except %rax and flags)
//
// This function may be called using an stdcall convention. It's
// useful for files named FOO.hookabi.c and BAR.ncabi.c to make
// calls into other parts of the system, that don't conform to the
// same restricted ABI.
//
// @param six args and fn addr pushed on stack in reverse order
// @return %rax has function return value, and stack is cleaned up
// @see libc/shadowargs.hook.c for intended use case
slowcall:
#param %r9 # 0x40 arg6
#param %r8 # 0x38 arg5
#param %rcx # 0x30 arg4
#param %rdx # 0x28 arg3
#param %rsi # 0x20 arg2
#param %rdi # 0x18 arg1
#param %rax # 0x10 call address
#param # 0x08 return address
push %rbp # 0x00 parent frame
mov %rsp,%rbp # ----
push %rdi #-0x08
push %rsi #-0x10
push %rdx #-0x18
push %rcx #-0x20
push %r8 #-0x28
push %r9 #-0x30
push %r10 #-0x38
push %r11 #-0x40
mov 0x10(%rbp),%rax
mov 0x18(%rbp),%rdi
mov 0x20(%rbp),%rsi
mov 0x28(%rbp),%rdx
mov 0x30(%rbp),%rcx
mov 0x38(%rbp),%r8
mov 0x40(%rbp),%r9
and $-16,%rsp
call *%rax
push %rax
mov 0x00(%rbp),%rax
mov %rax,0x38(%rbp)
mov 0x08(%rbp),%rax
mov %rax,0x40(%rbp)
pop %rax
lea -0x40(%rbp),%rsp
pop %r11
pop %r10
pop %r9
pop %r8
pop %rcx
pop %rdx
pop %rsi
pop %rdi
lea 0x38(%rbp),%rsp
pop %rbp
ret
.endfn slowcall,globl

View file

@ -1,24 +0,0 @@
#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_
#define COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \
({ \
void *ax; \
asm volatile("push\t%7\n\t" \
"push\t%6\n\t" \
"push\t%5\n\t" \
"push\t%4\n\t" \
"push\t%3\n\t" \
"push\t%2\n\t" \
"push\t%1\n\t" \
"call\tslowcall" \
: "=a"(ax) \
: "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \
"g"(arg5), "g"(arg6) \
: "memory"); \
ax; \
})
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_ */

View file

@ -1,33 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// TODO(jart): pmovzxbw and vpunpcklbw
strcpyzbw:
.leafprologue
.profilable
push %rdi
xor %eax,%eax
1: lodsb
stosw
test %al,%al
jnz 1b
pop %rax
.leafepilogue
.endfn strcpyzbw,globl

View file

@ -1,406 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/x86feature.h"
#include "libc/nexgen32e/macros.h"
#include "libc/macros.internal.h"
// Returns length of NUL-terminated string w/ security blankets.
//
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
// or (2) a NUL-terminator wasn't found in the first RSI bytes.
//
// @param rdi is a nullable NUL-terminated string pointer
// @param rsi is the maximum number of bytes to consider
// @return rax is the number of bytes, excluding the NUL
strnlen_s:
.leafprologue
.profilable
xor %eax,%eax
xor %r10d,%r10d
test %rdi,%rdi
jnz 0f
.leafepilogue
0: xor %edx,%edx
mov %rdi,%r8
// 𝑠𝑙𝑖𝑑𝑒
.endfn strnlen_s,globl
// Swiss army knife of string character scanning.
// Used to be fourteen fast functions in one.
//
// @param rdi is non-null string memory
// @param rsi is max number of bytes to consider
// @param dl is search character #1
// @param dh is search character #2
// @param r8 is subtracted from result (for length vs. pointer)
// @param r9 masks result if DH is found (for NUL vs. NULL)
// @param r10 masks result on bytes exhausted (for length v. NULL)
// @return rax end pointer after r8/r9/r10 modifications
strsak: lea -1(%rdi),%rax
1: add $1,%rax
sub $1,%rsi
jb .Lend
test $31,%al
jz .Lfast
.Lbyte: mov (%rax),%cl
cmp %cl,%dl
je .Ldone
cmp %cl,%dh
je .Lnul
jmp 1b
.Ldone: sub %r8,%rax
jmp .Lret
.Lend: mov %r10,%r9
.Lnul: sub %r8,%rax
and %r9,%rax
.Lret: .leafepilogue
.Lslow: add $32,%rsi
jmp .Lbyte
.Lfast: movzbl %dl,%ecx
movd %ecx,%xmm0
movzbl %dh,%ecx
movd %ecx,%xmm1
sub $32,%rax
#if !X86_NEED(AVX2)
testb X86_HAVE(AVX2)+kCpuids(%rip)
jz .Lsse2
#endif
vpbroadcastb %xmm0,%ymm0
vpbroadcastb %xmm1,%ymm1
1: add $32,%rax
sub $32,%rsi
9: jb .Lslow
vmovdqa (%rax),%ymm2
vpcmpeqb %ymm0,%ymm2,%ymm3
vpcmpeqb %ymm1,%ymm2,%ymm2
vpor %ymm3,%ymm2,%ymm2
vpmovmskb %ymm2,%ecx
bsf %ecx,%ecx
je 1b
vzeroupper
2: add %rcx,%rax
jmp .Lbyte
#if !X86_NEED(AVX2)
.Lsse2: pbroadcastb %xmm0
pbroadcastb %xmm1
1: add $32,%rax
sub $32,%rsi
jb 9b
movdqa (%rax),%xmm2
movdqa 16(%rax),%xmm3
movdqa %xmm3,%xmm4
pcmpeqb %xmm0,%xmm3
pcmpeqb %xmm1,%xmm4
por %xmm4,%xmm3
pmovmskb %xmm3,%ecx
shl $16,%ecx
movdqa %xmm2,%xmm4
pcmpeqb %xmm0,%xmm2
pcmpeqb %xmm1,%xmm4
por %xmm4,%xmm2
pmovmskb %xmm2,%r11d
or %r11d,%ecx
bsf %ecx,%ecx
je 1b
jmp 2b
#endif
.endfn strsak,globl,hidden
/* benchmarked on intel core i7-6700 @ 3.40GHz (skylake)
includes function call overhead (unless marked otherwise)
your strlen, &c (strsak+avx2) for #c per n where c 0.293ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 47.000 36.375 35.141 99
1 35.000 34.625 36.234 96
2 31.500 18.812 18.992 184
3 19.667 13.042 13.182 265
4 30.750 10.281 10.285 339
7 15.857 8.946 7.551 462
8 12.125 9.203 7.119 490
15 10.467 5.475 4.601 758
16 6.812 5.523 4.798 727
31 5.387 4.327 3.517 992
32 4.719 1.645 1.532 2278
63 5.000 2.403 2.034 1715
64 2.047 0.779 0.788 4427
127 2.134 1.194 1.027 3399
128 1.742 0.444 0.419 8327
255 0.945 0.594 0.554 6295
256 0.574 0.271 0.264 13226
511 0.785 0.362 0.307 11384
512 0.326 0.178 0.151 23134
1023 0.288 0.242 0.185 18862
1024 0.208 0.114 0.107 32565
2047 0.235 0.127 0.123 28430
2048 0.127 0.090 0.084 41413
4095 0.119 0.106 0.099 35116
4096 0.100 0.081 0.079 44372
8191 0.092 0.082 0.081 43176
8192 0.081 0.072 0.071 49419
16383 0.076 0.072 0.071 48847
16384 0.071 0.068 0.067 52381
32767 0.072 0.069 0.068 51154
32768 0.068 0.066 0.065 53409
your tinystrlen()
N x1 x8 x64 mBps
------------------------------------------------------------
1 53.000 33.625 33.672 97
1 33.000 32.125 32.234 101
2 24.500 19.438 17.711 184
3 23.667 12.875 11.911 273
4 13.750 9.281 9.238 352
7 11.000 6.125 5.801 560
8 7.625 5.609 5.232 621
15 11.800 3.825 3.364 966
16 4.562 3.648 3.173 1024 « optimal
31 3.710 2.851 2.298 1414
32 3.031 2.254 2.159 1506 « dropoff
63 2.683 1.827 1.691 1922
64 2.078 1.932 1.689 1924
127 1.630 1.647 1.622 2004
128 1.727 1.671 1.652 1968
255 1.392 1.450 1.435 2265
256 1.473 1.427 1.437 2262
511 1.325 1.353 1.337 2431
512 1.408 1.343 1.337 2431
1023 1.289 1.281 1.287 2525
1024 1.269 1.295 1.297 2506
2047 1.269 1.274 1.269 2561
2048 1.280 1.263 1.281 2538
4095 1.262 1.270 1.266 2568
4096 1.270 1.264 1.265 2570
8191 1.253 1.254 1.254 2592
8192 1.219 1.224 1.225 2653
16383 1.225 1.222 1.220 2663
16384 1.226 1.221 1.222 2659
32767 1.227 1.224 1.223 2658
32768 1.220 1.221 1.222 2659
glibc strlen for #c per n where c 0.273ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 3497.000 53.125 42.641 82
1 69.000 44.875 42.547 82
2 45.500 24.188 21.852 160
3 23.000 15.625 14.557 240
4 22.250 11.406 10.637 328
7 10.143 6.768 6.230 560
8 11.125 5.797 5.486 636
15 5.800 3.142 2.859 1220
16 7.062 3.070 2.737 1275
31 2.806 1.585 1.407 2481
32 3.156 1.574 1.349 2587
63 2.016 0.895 0.691 5049
64 1.328 0.744 0.670 5207
127 1.441 0.521 0.407 8577
128 0.648 0.454 0.405 8619
255 0.553 0.286 0.214 16277
256 0.387 0.235 0.218 15984
511 0.456 0.151 0.129 27077
512 0.182 0.134 0.129 27117
1023 0.171 0.106 0.082 42795
1024 0.112 0.088 0.082 42741
2047 0.099 0.069 0.059 59537
2048 0.072 0.060 0.058 59925
4095 0.065 0.053 0.047 74122
4096 0.061 0.048 0.047 74478
8191 0.048 0.045 0.044 79117
8192 0.051 0.045 0.044 79181
16383 0.042 0.040 0.061 57018
16384 0.069 0.063 0.061 57245
32767 0.081 0.073 0.068 51426
32768 0.084 0.072 0.068 51285
GCC strlen (-Os REPNZ SCASB) for #c per n where c 0.293ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 103.000 84.125 88.766 37
1 81.000 85.125 87.328 37
2 43.500 44.562 45.508 71
3 33.000 30.208 30.995 105
4 24.750 23.156 23.113 141
7 17.000 13.054 15.355 212
8 13.375 14.047 13.982 232
15 9.533 9.258 55.111 59
16 6.312 6.352 6.364 511
31 4.032 4.141 4.141 785
32 3.969 4.059 4.048 803
63 2.937 2.970 2.995 1086
64 2.922 2.939 2.956 1100
127 2.386 2.408 2.403 1353
128 2.383 2.403 2.401 1354
255 2.129 2.118 2.124 1530
256 2.137 2.133 2.130 1526
511 1.982 1.986 3.351 970
512 1.982 1.990 1.986 1637
1023 1.915 1.916 2.587 1257
1024 1.868 1.867 1.866 1742
2047 1.835 1.833 1.832 1775
2048 1.830 1.831 1.832 1775
4095 1.814 1.814 1.815 1791
4096 1.810 1.815 1.815 1791
8191 1.805 1.807 1.806 1800
8192 1.805 1.806 1.806 1800
16383 1.803 1.756 1.756 1851
16384 1.758 1.756 1.756 1851
32767 1.756 1.754 1.754 1853
32768 1.756 1.754 1.754 1853
Intel Optimz. Manual (SSE4.2) for #c per n where c 0.273ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 37.000 43.125 34.078 102
1 33.000 33.875 34.016 103
2 39.500 17.188 17.555 199
3 18.333 12.208 12.036 290
4 30.250 9.344 9.137 382
7 14.429 5.732 5.766 605
8 7.875 6.797 5.354 652
15 10.733 5.825 3.516 993
16 3.812 2.383 2.325 1501
31 4.097 2.609 2.079 1678
32 3.031 1.395 1.349 2587
63 2.937 1.558 1.079 3235
64 2.016 0.893 0.690 5056
127 1.929 0.721 0.607 5745
128 0.617 0.483 0.428 8147
255 1.275 0.404 0.411 8486
256 0.480 0.319 0.299 11681
511 0.479 0.307 0.288 12127
512 0.322 0.244 0.232 15013
1023 0.324 0.224 0.225 15512
1024 0.245 0.240 0.223 15651
2047 0.222 0.213 0.206 16938
2048 0.204 0.194 0.192 18140
4095 0.204 0.188 0.185 18888
4096 0.183 0.179 0.179 19446
8191 0.179 0.176 0.174 20000
8192 0.174 0.172 0.171 20383
16383 0.171 0.170 0.169 20604
16384 0.169 0.169 0.168 20808
32767 0.213 0.225 0.267 13064
32768 0.231 0.215 0.220 15852
musl libc strlen for #c per n where c 0.273ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 65.000 36.125 37.984 92
1 39.000 37.625 37.422 93
2 41.500 21.938 20.695 169
3 22.333 17.625 15.859 220
4 21.250 13.656 12.105 288
7 22.143 9.018 7.609 459
8 31.125 7.234 7.346 475
15 11.267 5.025 4.709 741
16 9.438 4.039 3.849 907
31 4.871 3.133 2.488 1402
32 5.219 2.246 2.039 1712
63 4.302 1.462 1.407 2479
64 2.109 1.428 1.155 3023
127 1.551 1.078 0.879 3971
128 1.742 0.903 0.760 4591
255 0.922 0.558 0.605 5764
256 0.934 0.575 0.537 6495
511 0.550 0.493 0.455 7674
512 0.646 0.490 0.426 8183
1023 0.550 0.439 0.425 8203
1024 0.472 0.421 0.408 8549
2047 0.507 0.334 0.373 9360
2048 0.403 0.426 0.409 8540
4095 0.391 0.240 0.236 14799
4096 0.238 0.222 0.221 15766
8191 0.225 0.223 0.221 15779
8192 0.225 0.214 0.215 16250
16383 0.212 0.212 0.210 16595
16384 0.209 0.210 0.211 16535
32767 0.214 0.208 0.205 17001
32768 0.207 0.207 0.291 12002
newlib strlen for #c per n where c 0.273ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 33.000 34.625 34.141 102
1 33.000 34.125 33.984 103
2 58.500 18.562 17.508 199
3 16.333 12.792 12.016 290
4 19.250 9.219 9.215 379
7 17.571 6.089 5.685 614
8 16.625 5.078 5.432 642
15 8.467 4.042 3.207 1088
16 3.938 2.773 2.733 1277
31 3.645 1.673 1.598 2183
32 3.281 1.527 1.493 2338
63 2.619 1.042 0.895 3901
64 1.422 0.928 0.813 4294
127 0.984 0.718 0.561 6222
128 1.195 0.591 0.532 6558
255 0.600 0.404 0.397 8785
256 0.621 0.429 0.376 9280
511 0.346 0.311 0.306 11421
512 0.420 0.308 0.296 11776
1023 0.284 0.285 0.285 12237
1024 0.321 0.282 0.280 12456
2047 0.253 0.252 0.252 13864
2048 0.260 0.249 0.249 14012
4095 0.236 0.236 0.236 14811
4096 0.239 0.235 0.234 14906
8191 0.233 0.228 0.227 15371
8192 0.230 0.227 0.227 15397
16383 0.223 0.224 0.223 15638
16384 0.223 0.224 0.223 15663
32767 0.224 0.387 0.225 15527
32768 0.223 0.222 0.222 15724
Agner Fog's strlen (SSE2) for #c per n where c 0.273ns
N x1 x8 x64 mBps
------------------------------------------------------------
1 59.000 38.375 38.453 91
1 37.000 38.625 38.234 91
2 18.500 19.062 19.273 181
3 13.000 12.792 12.859 271
4 9.250 9.594 9.660 361
7 5.286 5.554 5.502 634
8 4.625 4.703 4.791 728
15 2.600 2.858 2.622 1331
16 2.438 2.414 2.421 1442
31 2.161 1.399 1.290 2706
32 1.219 1.262 1.250 2793
63 1.508 0.875 0.693 5038
64 0.641 0.654 0.655 5328
127 1.205 0.406 0.379 9200
128 0.367 0.372 0.369 9463
255 0.467 0.310 0.235 14835
256 0.230 0.232 0.232 15034
511 0.272 0.181 0.159 21918
512 0.174 0.161 0.158 22148
1023 0.175 0.134 0.120 29043
1024 0.140 0.122 0.120 29005
2047 0.128 0.114 0.112 31205
2048 0.130 0.113 0.112 31242
4095 0.105 0.098 0.097 35984
4096 0.105 0.098 0.097 35973
8191 0.093 0.090 0.090 38953
8192 0.094 0.090 0.090 38986
16383 0.088 0.086 0.086 40648
16384 0.088 0.086 0.086 40652
32767 0.088 0.086 0.085 40956
32768 0.087 0.085 0.085 41114 */

View file

@ -1,41 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
#include "libc/nexgen32e/pcmpstr.inc"
#include "libc/nexgen32e/strstr.inc"
// TODO(jart): Fix me.
strstr_sse42:
.leafprologue
mov %rdi,%rax
xor %ecx,%ecx
0: mov $-16,%rdx
1: add $16,%rdx
movaps (%rsi,%rdx),%xmm0
2: add %rcx,%rax
lea (%rax,%rdx),%rdi
pcmpistri $.Lequalordered,(%rdi),%xmm0
3: ja 2b # !CF (no match) && !ZF (need NUL-term)
jnc 4f # !CF (no match) && ZF (NUL-terminator)
jno 0b # !OF CF && CX!=0 (matched at offset)
jns 1b # !SF NUL XMM1 (need to match more)
jmp 5f # youtu.be/nVk1DjMtLWs
4: xor %eax,%eax
5: .leafepilogue
.endfn strstr_sse42,globl,hidden

View file

@ -1,68 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
╚─────────────────────────────────────────────────────────────────────────────*/
/* clang-format off */
// Searches for substring.
//
// @param rdi is NUL-terminated haystack string
// @param rsi is NUL-terminated needle string (16-byte aligned)
// @return rax is pointer to substring or NULL
// @todo 10x faster than naïve but could be 100x faster
.macro .strstr mode:req
push %rbp
mov %rsp,%rbp
.profilable
sub $32,%rsp
mov %rdi,%rax
xor %ecx,%ecx
0: mov $-16,%rdx
1: add $16,%rdx
movaps (%rsi,%rdx),%xmm0
2: add %rcx,%rax
lea (%rax,%rdx),%rdi
test $15,%edi
jnz 6f
pcmpistri $\mode,(%rdi),%xmm0
3: ja 2b # !CF (no match) && !ZF (need NUL-term)
jnc 4f # !CF (no match) && ZF (NUL-terminator)
jno 0b # !OF ← CF && CX!=0 (matched at offset)
jns 1b # !SF ← NUL ∉ XMM1 (need to match more)
jmp 5f # youtu.be/nVk1DjMtLWs
4: xor %eax,%eax
5: leave
ret
6: mov %rdi,%r9 # same w/ pointer realign
and $15,%r9d
mov %edi,%r8d
and $0xfff,%r8d
cmp $0xff0,%r8d
ja 8f
7: pcmpistri $\mode,(%rdi),%xmm0
cmova %r9d,%ecx
jmp 3b
8: pcmpeqd %xmm2,%xmm2 # handle danger memory
mov %rdi,%r8
and $-16,%r8
movaps (%r8),%xmm1
movaps %xmm1,-32(%rbp)
movaps %xmm2,-16(%rbp)
pcmpistri $\mode,-32(%rbp,%r9),%xmm2
jz 4b
jmp 7b
.endm

View file

@ -1,26 +0,0 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
#include "libc/nexgen32e/pcmpstr.inc"
#include "libc/nexgen32e/strstr.inc"
// TODO(jart): Fix me.
strstr16$sse42:
.strstr .Lequalorder16
.endfn strstr16$sse42,globl,hidden

View file

@ -70,9 +70,7 @@ noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
void *memchr(const void *s, int c, size_t n) {
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) {
__asan_verify(s, n);
}
if (IsAsan()) __asan_verify(s, n);
r = memchr_sse(s, c, n);
} else {
r = memchr_pure(s, c, n);

View file

@ -68,9 +68,7 @@ noasan static inline const unsigned char *memrchr_sse(const unsigned char *s,
void *memrchr(const void *s, int c, size_t n) {
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) {
__asan_verify(s, n);
}
if (IsAsan()) __asan_verify(s, n);
r = memrchr_sse(s, c, n);
} else {
r = memrchr_pure(s, c, n);

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,46 +16,61 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/str/internal.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
#define kVectorSize 32 /* x86+avx2 is 256-bit cpu */
typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(2)));
typedef uint8_t uint8_v _Vector_size(kVectorSize);
typedef uint32_t vbitmask_t;
static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c,
size_t n) {
size_t i;
for (i = n; i--;) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
noasan static inline const char16_t *memrchr16_sse(const char16_t *s,
char16_t c, size_t n) {
size_t i;
unsigned k, m;
xmm_t v, t = {c, c, c, c, c, c, c, c};
for (i = n; i >= 8;) {
v = *(const xmm_t *)(s + (i -= 8));
m = __builtin_ia32_pmovmskb128(v == t);
if (m) {
m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1);
return s + i + m / 2;
}
}
while (i--) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
/**
* Returns how many bytes the utf16 string would be as utf8.
* Returns pointer to first instance of character.
*
* @param s is memory to search
* @param c is search byte which is masked with 65535
* @param n is number of char16_t elements in `s`
* @return is pointer to first instance of c or NULL if not found
* @asyncsignalsafe
*/
int strcmp_avx2(const char *s1, const char *s2) {
if (s1 == s2) return 0;
const unsigned char *p1 = (const unsigned char *)s1;
const unsigned char *p2 = (const unsigned char *)s2;
size_t i = -kVectorSize;
vLoop:
i += kVectorSize;
bLoop:
if (!IsPointerDangerous(p1 + i) && !IsPointerDangerous(p2 + i)) {
unsigned char zf;
vbitmask_t r1;
uint8_v v1, v2;
const uint8_v kZero = {0};
asm(ZFLAG_ASM("vmovdqu\t%5,%2\n\t" /* move because gcc problematic */
"vpcmpeqb\t%4,%2,%1\n\t" /* check for equality in p1 and p2 */
"vpcmpeqb\t%6,%2,%2\n\t" /* check for nul in p1 */
"vpandn\t%7,%1,%2\n\t" /* most complicated bitwise not ever */
"vpor\t%2,%1,%1\n\t" /* check for nul in p2 */
"pmovmskb\t%1,%3\n\t" /* turn 256 bits into 32 bits */
"bsf\t%3,%3") /* find stop byte */
: ZFLAG_CONSTRAINT(zf), "=x"(v1), "=x"(v2), "=r"(r1)
: "m"(*(const uint8_v *)(p1 + i)), "m"(*(const uint8_v *)(p2 + i)),
"x"(kZero), "m"(kVectorSize));
if (zf) goto vLoop;
return p1[i + r1] - p2[i + r1];
void *memrchr16(const void *s, int c, size_t n) {
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, n * 2);
r = memrchr16_sse(s, c, n);
} else {
i += 1;
int c;
if (!(c = p1[i - 1] - p2[i - 1]) && p1[i - 1] + p1[i - 1] != 0) goto bLoop;
return c;
r = memrchr16_pure(s, c, n);
}
return (void *)r;
}

View file

@ -88,7 +88,6 @@ void *memmove(void *, const void *, size_t) memcpyesque;
void *memcpy(void *restrict, const void *restrict, size_t) memcpyesque;
void *mempcpy(void *restrict, const void *restrict, size_t) memcpyesque;
void *memccpy(void *restrict, const void *restrict, int, size_t) memcpyesque;
void *memeqmask(void *, const void *, const void *, size_t) memcpyesque;
void bcopy(const void *, void *, size_t) memcpyesque;
void explicit_bzero(void *, size_t);
@ -173,7 +172,6 @@ wchar_t *wcsncat(wchar_t *, const wchar_t *, size_t) memcpyesque;
char *strncpy(char *, const char *, size_t) memcpyesque;
char *strtok(char *, const char *) paramsnonnull((2)) libcesque;
char *strtok_r(char *, const char *, char **) paramsnonnull((2, 3));
uint16_t *strcpyzbw(uint16_t *, const char *) memcpyesque;
wchar_t *wcstok(wchar_t *, const wchar_t *, wchar_t **) paramsnonnull((2, 3));
char *wstrtrunc(uint16_t *) memcpyesque;
char *wstrntrunc(uint16_t *, size_t) memcpyesque;

View file

@ -20,10 +20,7 @@
#include "libc/str/str.h"
static inline noasan uint64_t UncheckedAlignedRead64(const char *p) {
return (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
return *(uint64_t *)p;
}
/**

View file

@ -18,12 +18,14 @@
*/
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h"
static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) {
uint64_t w;
for (; i + 8 < n; i += 8) {
w = READ64LE(s + i);
w = *(uint64_t *)(s + i);
if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
i += (unsigned)__builtin_ctzll(w) >> 3;
break;
@ -40,8 +42,9 @@ static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) {
* @return byte length
* @asyncsignalsafe
*/
size_t strnlen(const char *s, size_t n) {
noasan size_t strnlen(const char *s, size_t n) {
size_t i;
if (IsAsan() && n) __asan_verify(s, 1);
for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
if (i == n || !s[i]) return i;
}
@ -50,5 +53,6 @@ size_t strnlen(const char *s, size_t n) {
if (i == n || !s[i]) break;
}
assert(i == n || (i < n && !s[i]));
if (IsAsan()) __asan_verify(s, i);
return i;
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,32 +16,46 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/testlib/testlib.h"
#include "libc/assert.h"
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/str/str.h"
TEST(sidiv, smoke) {
EXPECT_EQ(13373133731337 / 10, div10int64(13373133731337));
EXPECT_EQ(13373133731337 / 100, div100int64(13373133731337));
EXPECT_EQ(13373133731337 / 1000, div1000int64(13373133731337));
EXPECT_EQ(13373133731337 / 10000, div10000int64(13373133731337));
EXPECT_EQ(13373133731337 / 1000000, div1000000int64(13373133731337));
EXPECT_EQ(13373133731337 / 1000000000, div1000000000int64(13373133731337));
static noasan size_t strnlen_s_x64(const char *s, size_t n, size_t i) {
uint64_t w;
for (; i + 8 < n; i += 8) {
w = *(uint64_t *)(s + i);
if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
i += (unsigned)__builtin_ctzll(w) >> 3;
break;
}
}
return i;
}
TEST(sirem, smoke) {
EXPECT_EQ(13373133731337 % 10, rem10int64(13373133731337));
EXPECT_EQ(13373133731337 % 100, rem100int64(13373133731337));
EXPECT_EQ(13373133731337 % 1000, rem1000int64(13373133731337));
EXPECT_EQ(13373133731337 % 10000, rem10000int64(13373133731337));
EXPECT_EQ(13373133731337 % 1000000, rem1000000int64(13373133731337));
EXPECT_EQ(13373133731337 % 1000000000, rem1000000000int64(13373133731337));
/**
* Returns length of NUL-terminated string... securely.
*
* This is like strnlen() except it'll return 0 if `s` is null. We also
* make the assumption for the purposes of ASAN that `n` is the size of
* the buffer if `s` is non-null.
*
* @param s is string
* @param n is max length
* @return byte length
* @asyncsignalsafe
*/
noasan size_t strnlen_s(const char *s, size_t n) {
size_t i;
if (!s) return 0;
if (IsAsan()) __asan_verify(s, n);
for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
if (i == n || !s[i]) return i;
}
TEST(rem, euclid) {
ASSERT_EQ(-2, rem10int64(-12));
ASSERT_EQ(-1, rem10int64(-1));
ASSERT_EQ(0, rem10int64(0));
ASSERT_EQ(1, rem10int64(1));
ASSERT_EQ(9, rem10int64(9));
ASSERT_EQ(1, rem10int64(11));
i = strnlen_s_x64(s, n, i);
for (;; ++i) {
if (i == n || !s[i]) break;
}
assert(i == n || (i < n && !s[i]));
return i;
}

76
libc/str/wmemrchr.c Normal file
View file

@ -0,0 +1,76 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/str/str.h"
typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(4)));
static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c,
size_t n) {
size_t i;
for (i = n; i--;) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
size_t n) {
size_t i;
unsigned k, m;
xmm_t v, t = {c, c, c, c};
for (i = n; i >= 4;) {
v = *(const xmm_t *)(s + (i -= 4));
m = __builtin_ia32_pmovmskb128(v == t);
if (m) {
m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1);
return s + i + m / 4;
}
}
while (i--) {
if (s[i] == c) {
return s + i;
}
}
return 0;
}
/**
* Returns pointer to first instance of character.
*
* @param s is memory to search
* @param c is search word
* @param n is number of wchar_t elements in `s`
* @return is pointer to first instance of c or NULL if not found
* @asyncsignalsafe
*/
void *wmemrchr(const void *s, wchar_t c, size_t n) {
const void *r;
if (!IsTiny() && X86_HAVE(SSE)) {
if (IsAsan()) __asan_verify(s, n * 4);
r = wmemrchr_sse(s, c, n);
} else {
r = wmemrchr_pure(s, c, n);
}
return (void *)r;
}

View file

@ -29,6 +29,7 @@
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/internal.h"
#include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
@ -77,6 +78,7 @@ static int cthread_start(void *arg) {
exitcode = (void *)rc.dx;
}
td->exitcode = exitcode;
_pthread_key_destruct(td->key);
if (atomic_load(&td->state) & cthread_detached) {
// we're still using the stack
// thus we can't munmap it yet

View file

@ -54,7 +54,6 @@ int cthread_join(cthread_t td, void **exitcode) {
} else {
if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) {
while ((x = atomic_load(&td->tid))) {
// FUTEX_WAIT_PRIVATE makes it hang
cthread_memory_wait32(&td->tid, x, 0);
}
}

View file

@ -30,7 +30,7 @@ StartOver:
x = _pthread_key_usage[i];
while (x) {
j = bsrl(x);
if ((dtor = _pthread_key_dtor[i * 64 + j]) && (value = key[i * 64 + j])) {
if ((value = key[i * 64 + j]) && (dtor = _pthread_key_dtor[i * 64 + j])) {
key[i * 64 + j] = 0;
dtor(value);
goto StartOver;

View file

@ -18,53 +18,25 @@
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/dce.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/consts/futex.h"
#include "libc/thread/freebsd.internal.h"
#include "libc/errno.h"
#include "libc/intrin/futex.internal.h"
#include "libc/thread/thread.h"
int cthread_memory_wait32(int* addr, int val, const struct timespec* timeout) {
size_t size;
struct _umtx_time *put, ut;
if (IsLinux() || IsOpenbsd()) {
return sys_futex(addr, FUTEX_WAIT, val, timeout, 0);
#if 0
} else if (IsFreebsd()) {
if (!timeout) {
put = 0;
size = 0;
return _futex_wait(addr, val, timeout);
} else {
ut._flags = 0;
ut._clockid = CLOCK_REALTIME;
ut._timeout = *timeout;
put = &ut;
size = sizeof(ut);
}
return _umtx_op(addr, UMTX_OP_MUTEX_WAIT, 0, &size, put);
#endif
} else {
unsigned tries;
for (tries = 1; atomic_load(addr) == val; ++tries) {
if (tries & 7) {
__builtin_ia32_pause();
} else {
sched_yield();
}
}
return 0;
return sched_yield();
}
}
int cthread_memory_wake32(int* addr, int n) {
if (IsLinux() || IsOpenbsd()) {
return sys_futex(addr, FUTEX_WAKE, n, 0, 0);
#if 0
} else if (IsFreebsd()) {
return _umtx_op(addr, UMTX_OP_MUTEX_WAKE, n, 0, 0);
#endif
return _futex_wake(addr, n);
} else {
return 0;
}
return -1;
}

View file

@ -41,6 +41,7 @@ void cthread_zombies_add(cthread_t td) {
void cthread_zombies_reap(void) {
struct Zombie *z;
// TODO(jart): Is this right? Update to not use malloc/free?
while ((z = atomic_load(&cthread_zombies)) && !atomic_load(&z->td->tid)) {
if (atomic_compare_exchange_weak(&cthread_zombies, &z, z->next)) {
munmap(z->td->alloc.bottom, z->td->alloc.top - z->td->alloc.bottom);

View file

@ -28,7 +28,7 @@ long double dsleep(long double secs) {
struct timespec dur, rem;
dur.tv_sec = secs;
dur.tv_nsec = secs * 1e9;
dur.tv_nsec = rem1000000000int64(dur.tv_nsec);
dur.tv_nsec = dur.tv_nsec % 1000000000;
if (secs > 1e-6) {
nanosleep(&dur, &rem);
secs = rem.tv_nsec;

View file

@ -33,8 +33,23 @@
#include "libc/testlib/testlib.h"
#include "tool/net/sandbox.h"
// It's been reported that Chromebooks return EINVAL here.
bool CanUseSeccomp(void) {
int ws, pid;
ASSERT_NE(-1, (pid = fork()));
if (!pid) {
if (seccomp(SECCOMP_SET_MODE_STRICT, 0, 0) != -1) {
_Exit1(0);
} else {
_Exit1(1);
}
}
EXPECT_NE(-1, wait(&ws));
return WIFEXITED(ws) && !WEXITSTATUS(ws);
}
void SetUp(void) {
if (!__is_linux_2_6_23()) {
if (!__is_linux_2_6_23() || !CanUseSeccomp()) {
exit(0);
}
}

View file

@ -131,7 +131,7 @@ TEST(pthread_mutex_lock, contention) {
for (i = 0; i < THREADS; ++i) {
munmap(stack[i], GetStackSize());
}
pthread_mutex_destroy(&lock);
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
}
TEST(pthread_mutex_lock, rcontention) {
@ -159,7 +159,7 @@ TEST(pthread_mutex_lock, rcontention) {
for (i = 0; i < THREADS; ++i) {
munmap(stack[i], GetStackSize());
}
pthread_mutex_destroy(&lock);
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
}
TEST(pthread_mutex_lock, econtention) {
@ -187,7 +187,7 @@ TEST(pthread_mutex_lock, econtention) {
for (i = 0; i < THREADS; ++i) {
munmap(stack[i], GetStackSize());
}
pthread_mutex_destroy(&lock);
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
}
int SpinlockWorker(void *p) {

View file

@ -106,9 +106,9 @@ TEST(strnlen, nulNotFound_ReturnsSize) {
}
}
TEST(strnlen_s, nulNotFound_ReturnsZero) {
TEST(strnlen_s, nulNotFound) {
char buf[3] = {1, 2, 3};
ASSERT_EQ(0, strnlen_s(buf, 3));
ASSERT_EQ(3, strnlen_s(buf, 3));
}
TEST(strlen, fuzz) {

View file

@ -1,101 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/bits.h"
#include "libc/runtime/buffer.h"
#include "libc/runtime/gc.internal.h"
#include "libc/stdio/stdio.h"
#include "libc/str/str.h"
#include "libc/testlib/testlib.h"
#include "libc/x/x.h"
#define ALIGN 128
#define BUFSIZE (8 * 32)
#define MASKSIZE (BUFSIZE / CHAR_BIT)
const char kX[] = "aaaaaaaaeeeeeeeeeeeeeeeeeeeeeeee"
"e e"
"e e"
"e e"
"e e"
"e e"
"e e"
"eeeeeeeeeeeeeeeeeeeeeeeeeeeeee-e";
const char kY[] = "aaaaaaaaefffffffffffeffffffffff-"
"f z-"
"f f"
"f f"
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
"f f"
"f f"
"ffffffffffffffffffffffffffffff-f";
const char kM[] = "11111111100000000000100000000000"
"01111111111111111111111111111100"
"01111111111111111111111111111110"
"01111111111111111111111111111110"
"00000000000000000000000000000000"
"01111111111111111111111111111110"
"01111111111111111111111111111110"
"00000000000000000000000000000010";
dontdiscard char *binify(uint8_t *data, size_t size) {
uint8_t b;
size_t i, j;
char *s, *p;
p = s = xmalloc(size * CHAR_BIT + 1);
for (i = 0; i < size; ++i) {
b = data[i];
for (j = 0; j < CHAR_BIT; ++j) {
*p++ = "01"[b & 1];
b >>= 1;
}
}
*p = '\0';
return s;
}
TEST(memeqmask, test) {
struct GuardedBuffer x = {}, y = {}, m = {};
memcpy(balloc(&x, ALIGN, BUFSIZE), kX, BUFSIZE);
memcpy(balloc(&y, ALIGN, BUFSIZE), kY, BUFSIZE);
balloc(&m, ALIGN, MASKSIZE);
EXPECT_EQ((intptr_t)m.p, (intptr_t)memeqmask(m.p, x.p, y.p, BUFSIZE));
EXPECT_STREQ(kM, gc(binify(m.p, MASKSIZE)));
bfree(&m);
bfree(&x);
bfree(&y);
}
#if 0
#include "libc/rand/rand.h"
#include "libc/testlib/ezbench.h"
TEST(memeqmask, bench) {
size_t len = 64 * 1024;
char *m = xmemalign(64, DIMMASK(len));
char *x = xmemalign(64, len);
char *y = xmemalign(64, len);
EZBENCH(
{
rngset(x, len, rand64, -1);
rngset(y, len, rand64, -1);
},
memeqmask(m, x, y, len));
}
#endif

View file

@ -19,6 +19,7 @@
#include "libc/nexgen32e/nexgen32e.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
TEST(memrchr16, test) {
@ -31,4 +32,6 @@ TEST(memrchr16, test) {
BENCH(memrchr16, bench) {
EZBENCH2("memrchr16", donothing,
EXPROPRIATE(memrchr16(u"yo.hi.there", '.', 11)));
EZBENCH2("memrchr16 hyperion", donothing,
EXPROPRIATE(memrchr16(kHyperion, '.', kHyperionSize / 2)));
}

View file

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,40 +16,21 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/internal.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h"
unsigned cescapec(int c) {
unsigned char ch = c;
switch (ch) {
case '\a':
return '\\' | 'a' << 8;
case '\b':
return '\\' | 'b' << 8;
case '\v':
return '\\' | 'v' << 8;
case '\f':
return '\\' | 'f' << 8;
case '\?':
return '\\' | '?' << 8;
case '\n':
return '\\' | 'n' << 8;
case '\r':
return '\\' | 'r' << 8;
case '\t':
return '\\' | 't' << 8;
case '\"':
return '\\' | '"' << 8;
case '\'':
return '\\' | '\'' << 8;
case '\\':
return '\\' | '\\' << 8;
default: {
if (ch >= 0x80 || !isprint(ch)) {
return '\\' | (ch / 64 + '0') << 8 | (ch % 64 / 8 + '0') << 16 |
(ch % 8 + '0') << 24;
} else {
return ch;
}
}
TEST(wmemrchr, test) {
EXPECT_EQ(NULL, wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '-', 27));
EXPECT_STREQ(L".there", wmemrchr(L"yo.hi.there", '.', 11));
EXPECT_STREQ(L".thereeuhcruhrceeuhcre",
wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '.', 27));
}
BENCH(wmemrchr, bench) {
EZBENCH2("wmemrchr", donothing,
EXPROPRIATE(wmemrchr(L"yo.hi.there", '.', 11)));
EZBENCH2("wmemrchr hyperion", donothing,
EXPROPRIATE(wmemrchr(kHyperion, '.', kHyperionSize / 4)));
}

View file

@ -723,12 +723,14 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height,
quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
for (i = 0; i < 64; ++i) {
int uvti, yti = div100int64((YQT[i] * quality + 50));
YTable[stbiw__jpg_ZigZag[i]] =
(unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti);
uvti = div100int64(UVQT[i] * quality + 50);
UVTable[stbiw__jpg_ZigZag[i]] =
(unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
int uvti, yti = (YQT[i] * quality + 50) / 100;
YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1
: yti > 255 ? 255
: yti);
uvti = (UVQT[i] * quality + 50) / 100;
UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1
: uvti > 255 ? 255
: uvti);
}
for (row = 0, k = 0; row < 8; ++row) {