mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Perform some code cleanup
This commit is contained in:
parent
0dd9629562
commit
a4601a24d3
63 changed files with 350 additions and 1643 deletions
|
@ -24,7 +24,7 @@
|
|||
|
||||
int sys_nanosleep_xnu(const struct timespec *req, struct timespec *rem) {
|
||||
long millis;
|
||||
millis = div1000int64(req->tv_nsec);
|
||||
millis = req->tv_nsec / 1000;
|
||||
millis = MAX(1, millis);
|
||||
return sys_select(0, 0, 0, 0, &(struct timeval){req->tv_sec, millis});
|
||||
}
|
||||
|
|
|
@ -41,19 +41,19 @@ int sys_utimensat_xnu(int dirfd, const char *path, const struct timespec ts[2],
|
|||
tv[0] = now;
|
||||
} else if (ts[0].tv_nsec == UTIME_OMIT) {
|
||||
tv[0].tv_sec = st.st_atim.tv_sec;
|
||||
tv[0].tv_usec = div1000int64(st.st_atim.tv_nsec);
|
||||
tv[0].tv_usec = st.st_atim.tv_nsec / 1000;
|
||||
} else {
|
||||
tv[0].tv_sec = ts[0].tv_sec;
|
||||
tv[0].tv_usec = div1000int64(ts[0].tv_nsec);
|
||||
tv[0].tv_usec = ts[0].tv_nsec / 1000;
|
||||
}
|
||||
if (ts[1].tv_nsec == UTIME_NOW) {
|
||||
tv[1] = now;
|
||||
} else if (ts[1].tv_nsec == UTIME_OMIT) {
|
||||
tv[1].tv_sec = st.st_mtim.tv_sec;
|
||||
tv[1].tv_usec = div1000int64(st.st_mtim.tv_nsec);
|
||||
tv[1].tv_usec = st.st_mtim.tv_nsec / 1000;
|
||||
} else {
|
||||
tv[1].tv_sec = ts[1].tv_sec;
|
||||
tv[1].tv_usec = div1000int64(ts[1].tv_nsec);
|
||||
tv[1].tv_usec = ts[1].tv_nsec / 1000;
|
||||
}
|
||||
} else {
|
||||
tv[0] = now;
|
||||
|
|
12
libc/intrin/futex.internal.h
Normal file
12
libc/intrin/futex.internal.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_
|
||||
#define COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
int _futex_wait(void *, int, struct timespec *);
|
||||
int _futex_wake(void *, int);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_INTRIN_FUTEX_INTERNAL_H_ */
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,14 +16,29 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/bits/asmflag.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
|
||||
// Divides 64-bit signed integer by 1,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000int64:
|
||||
mov $0x7,%cl
|
||||
movabs $0x20c49ba5e353f7cf,%rdx
|
||||
jmp tinydivsi
|
||||
.endfn div1000int64,globl
|
||||
privileged int _futex_wait(void *addr, int expect, struct timespec *timeout) {
|
||||
int ax;
|
||||
bool cf;
|
||||
char buf[45];
|
||||
asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t"
|
||||
"clc\n\t"
|
||||
"syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax)
|
||||
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect),
|
||||
"g"(timeout)
|
||||
: "rcx", "r10", "r11", "memory");
|
||||
if (cf) ax = -ax;
|
||||
STRACE("futex(%p, FUTEX_WAIT, %d, %s) → %s", addr, expect,
|
||||
DescribeTimespec(buf, sizeof(buf), 0, timeout),
|
||||
ax ? strerrno(-ax) : "0");
|
||||
return ax;
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,39 +16,34 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/asmflag.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
|
||||
#define N 32
|
||||
typedef uint8_t uint8_v _Vector_size(N);
|
||||
|
||||
/**
|
||||
* Searches for last instance of character in memory region.
|
||||
*
|
||||
* @param s is binary data to search
|
||||
* @param c is treated as unsigned char
|
||||
* @param n is byte length of s
|
||||
* @return address of last c in s, or NULL if not found
|
||||
*/
|
||||
void *memrchr(const void *s, int c, size_t n) {
|
||||
unsigned char ch = (unsigned char)c;
|
||||
const unsigned char *p = (const unsigned char *)s;
|
||||
if (n >= 32 && CheckAvx2()) {
|
||||
uint8_v cv;
|
||||
__builtin_memset(&cv, ch, sizeof(cv));
|
||||
do {
|
||||
uint32_t skip;
|
||||
uint8_v sv, tv;
|
||||
memcpy(&sv, s + n - N, N);
|
||||
asm("vpcmpeqb\t%2,%3,%1\n\t"
|
||||
"vpmovmskb\t%1,%0\n\t"
|
||||
"lzcnt\t%0,%0"
|
||||
: "=r"(skip), "=x"(tv)
|
||||
: "x"(sv), "x"(cv));
|
||||
n -= skip;
|
||||
if (skip != 32) break;
|
||||
} while (n >= 32);
|
||||
static const char *FormatFutexWakeResult(char buf[12], int ax) {
|
||||
if (ax >= 0) {
|
||||
FormatInt32(buf, ax);
|
||||
return buf;
|
||||
} else {
|
||||
return strerrno(-ax);
|
||||
}
|
||||
while (n--) {
|
||||
if (p[n] == ch) return (/* unconst */ void *)&p[n];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
privileged int _futex_wake(void *addr, int count) {
|
||||
int ax;
|
||||
bool cf;
|
||||
char buf[12];
|
||||
asm volatile(CFLAG_ASM("clc\n\t"
|
||||
"syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax)
|
||||
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count)
|
||||
: "rcx", "r11", "memory");
|
||||
if (cf) ax = -ax;
|
||||
STRACE("futex(%p, FUTEX_WAKE, %d) → %s", addr, count,
|
||||
FormatFutexWakeResult(buf, ax));
|
||||
return ax;
|
||||
}
|
|
@ -73,6 +73,8 @@ o/$(MODE)/libc/intrin/kprintf.greg.o: \
|
|||
-fno-stack-protector
|
||||
|
||||
# synchronization primitives are intended to be magic free
|
||||
o/$(MODE)/libc/intrin/futex_wait.o \
|
||||
o/$(MODE)/libc/intrin/futex_wake.o \
|
||||
o/$(MODE)/libc/intrin/gettid.greg.o \
|
||||
o/$(MODE)/libc/intrin/pthread_mutex_lock.o \
|
||||
o/$(MODE)/libc/intrin/pthread_mutex_unlock.o \
|
||||
|
|
|
@ -126,6 +126,7 @@ void *pthread_getspecific(pthread_key_t);
|
|||
!atomic_exchange(&(mutex)->lock, 1)) \
|
||||
? 0 \
|
||||
: pthread_mutex_lock(mutex))
|
||||
/*
|
||||
#define pthread_mutex_unlock(mutex) \
|
||||
((mutex)->attr == PTHREAD_MUTEX_NORMAL \
|
||||
? (atomic_store_explicit(&(mutex)->lock, 0, memory_order_relaxed), \
|
||||
|
@ -134,6 +135,7 @@ void *pthread_getspecific(pthread_key_t);
|
|||
_pthread_mutex_wake(mutex)), \
|
||||
0) \
|
||||
: pthread_mutex_unlock(mutex))
|
||||
*/
|
||||
#endif
|
||||
|
||||
int _pthread_mutex_wake(pthread_mutex_t *) hidden;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/pthread.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
|
@ -24,6 +25,12 @@
|
|||
* @return 0 on success, or error number on failure
|
||||
*/
|
||||
int pthread_mutex_destroy(pthread_mutex_t *mutex) {
|
||||
int rc;
|
||||
if (!mutex->lock && !mutex->waits) {
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = EDEADLK;
|
||||
}
|
||||
bzero(mutex, sizeof(*mutex));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/intrin/pthread.h"
|
||||
#include "libc/intrin/spinlock.h"
|
||||
#include "libc/linux/futex.h"
|
||||
|
@ -28,38 +29,15 @@
|
|||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
|
||||
static inline int FutexWait(void *addr, int expect, struct timespec *timeout) {
|
||||
int ax;
|
||||
bool cf;
|
||||
asm volatile(CFLAG_ASM("mov\t%6,%%r10\n\t"
|
||||
"clc\n\t"
|
||||
"syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax)
|
||||
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAIT), "d"(expect),
|
||||
"g"(timeout)
|
||||
: "rcx", "r10", "r11", "memory");
|
||||
if (cf) ax = -ax;
|
||||
return ax;
|
||||
}
|
||||
|
||||
static int pthread_mutex_lock_spin(pthread_mutex_t *mutex, int tries) {
|
||||
volatile int i;
|
||||
struct timespec ts;
|
||||
if (tries < 7) {
|
||||
for (i = 0; i != 1 << tries; i++) {
|
||||
}
|
||||
tries++;
|
||||
} else if (IsLinux() || IsOpenbsd()) {
|
||||
atomic_fetch_add(&mutex->waits, 1);
|
||||
if (tries < 28) {
|
||||
ts.tv_sec = 0;
|
||||
ts.tv_nsec = 4 << tries;
|
||||
tries++;
|
||||
} else {
|
||||
ts.tv_sec = 1;
|
||||
ts.tv_nsec = 0;
|
||||
}
|
||||
FutexWait(&mutex->lock, 1, &ts);
|
||||
_futex_wait(&mutex->lock, 1, &(struct timespec){1});
|
||||
atomic_fetch_sub(&mutex->waits, 1);
|
||||
} else {
|
||||
sched_yield();
|
||||
|
|
|
@ -41,7 +41,7 @@ int(pthread_mutex_unlock)(pthread_mutex_t *mutex) {
|
|||
case PTHREAD_MUTEX_NORMAL:
|
||||
atomic_store_explicit(&mutex->lock, 0, memory_order_relaxed);
|
||||
if ((IsLinux() || IsOpenbsd()) &&
|
||||
atomic_load_explicit(&mutex->waits, memory_order_relaxed)) {
|
||||
atomic_load_explicit(&mutex->waits, memory_order_relaxed) > 0) {
|
||||
_pthread_mutex_wake(mutex);
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -16,23 +16,9 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/asmflag.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/intrin/pthread.h"
|
||||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
|
||||
static inline int FutexWake(void *addr, int count) {
|
||||
int ax;
|
||||
bool cf;
|
||||
asm volatile(CFLAG_ASM("clc\n\t"
|
||||
"syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax)
|
||||
: "1"(__NR_futex), "D"(addr), "S"(FUTEX_WAKE), "d"(count)
|
||||
: "rcx", "r11", "memory");
|
||||
if (cf) ax = -ax;
|
||||
return ax;
|
||||
}
|
||||
|
||||
int _pthread_mutex_wake(pthread_mutex_t *mutex) {
|
||||
return FutexWake(&mutex->lock, 1);
|
||||
return _futex_wake(&mutex->lock, 1);
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ sched_yield:
|
|||
testb IsXnu()
|
||||
jz 1f
|
||||
pause
|
||||
xor %eax,%eax
|
||||
ret
|
||||
#endif
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "libc/bits/atomic.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/intrin/wait0.internal.h"
|
||||
#include "libc/linux/futex.h"
|
||||
|
||||
|
@ -34,8 +35,8 @@ void _wait0(int *ptid) {
|
|||
for (;;) {
|
||||
if (!(x = atomic_load_explicit(ptid, memory_order_relaxed))) {
|
||||
break;
|
||||
} else if (IsLinux()) {
|
||||
LinuxFutexWait(ptid, x, 0);
|
||||
} else if (IsLinux() || IsOpenbsd()) {
|
||||
_futex_wait(ptid, x, &(struct timespec){2});
|
||||
} else {
|
||||
sched_yield();
|
||||
}
|
||||
|
|
|
@ -111,9 +111,9 @@ void(vflogf)(unsigned level, const char *file, int line, FILE *f,
|
|||
if (bufmode == _IOLBF) f->bufmode = _IOFBF;
|
||||
|
||||
if ((fprintf_unlocked)(f, "%r%c%s%06ld:%s:%d:%.*s:%d] ",
|
||||
"FEWIVDNT"[level & 7], buf32,
|
||||
rem1000000int64(div1000int64(dots)), file, line,
|
||||
strchrnul(prog, '.') - prog, prog, getpid()) <= 0) {
|
||||
"FEWIVDNT"[level & 7], buf32, dots / 1000 % 1000000,
|
||||
file, line, strchrnul(prog, '.') - prog, prog,
|
||||
getpid()) <= 0) {
|
||||
vflogf_onfail(f);
|
||||
}
|
||||
(vfprintf_unlocked)(f, fmt, va);
|
||||
|
|
|
@ -3,20 +3,6 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/*
|
||||
* BIT SCANNING 101
|
||||
* ctz(𝑥) 31^clz(𝑥) clz(𝑥)
|
||||
* uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
* 0x00000000 wut 32 0 wut 32
|
||||
* 0x00000001 0 0 1 0 31
|
||||
* 0x80000001 0 0 1 31 0
|
||||
* 0x80000000 31 31 32 31 0
|
||||
* 0x00000010 4 4 5 4 27
|
||||
* 0x08000010 4 4 5 27 4
|
||||
* 0x08000000 27 27 28 27 4
|
||||
* 0xffffffff 0 0 1 31 0
|
||||
*/
|
||||
|
||||
int bsf(int) pureconst;
|
||||
int bsfl(long) pureconst;
|
||||
int bsfll(long long) pureconst;
|
||||
|
|
|
@ -3,20 +3,6 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/*
|
||||
* BIT SCANNING 101
|
||||
* ctz(𝑥) 31^clz(𝑥) clz(𝑥)
|
||||
* uint32 𝑥 bsf(𝑥) tzcnt(𝑥) ffs(𝑥) bsr(𝑥) lzcnt(𝑥)
|
||||
* 0x00000000 wut 32 0 wut 32
|
||||
* 0x00000001 0 0 1 0 31
|
||||
* 0x80000001 0 0 1 31 0
|
||||
* 0x80000000 31 31 32 31 0
|
||||
* 0x00000010 4 4 5 4 27
|
||||
* 0x08000010 4 4 5 27 4
|
||||
* 0x08000000 27 27 28 27 4
|
||||
* 0xffffffff 0 0 1 31 0
|
||||
*/
|
||||
|
||||
int bsr(int) pureconst;
|
||||
int bsrl(long) pureconst;
|
||||
int bsrll(long long) pureconst;
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_
|
||||
#define COSMOPOLITAN_LIBC_NEXGEN32E_CACHESIZE_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define kCpuCacheTypeData 1
|
||||
#define kCpuCacheTypeInstruction 2
|
||||
#define kCpuCacheTypeUnified 3
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
unsigned getcachesize(int, int);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Divides 64-bit signed integer by 1,000,000,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000000000int64:
|
||||
mov $0x1a,%cl
|
||||
movabs $0x112e0be826d694b3,%rdx
|
||||
jmp tinydivsi
|
||||
.globl tinydivsi
|
||||
.endfn div1000000000int64,globl
|
|
@ -1,29 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Divides 64-bit signed integer by 1,000,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div1000000int64:
|
||||
mov $0x12,%cl
|
||||
movabs $0x431bde82d7b634db,%rdx
|
||||
jmp tinydivsi
|
||||
.endfn div1000000int64,globl
|
|
@ -1,29 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Divides 64-bit signed integer by 10,000.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return truncated quotient
|
||||
div10000int64:
|
||||
mov $11,%cl
|
||||
movabs $0x346dc5d63886594b,%rdx
|
||||
jmp tinydivsi
|
||||
.endfn div10000int64,globl
|
|
@ -1,34 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Divides 64-bit signed integer by 100.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return rax has quotient
|
||||
div100int64:
|
||||
mov %rdi,%rax
|
||||
movabs $-6640827866535438581,%rdx
|
||||
imul %rdx
|
||||
lea (%rdx,%rdi),%rax
|
||||
sar $63,%rdi
|
||||
sar $6,%rax
|
||||
sub %rdi,%rax
|
||||
ret
|
||||
.endfn div100int64,globl
|
|
@ -1,29 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Divides 64-bit signed integer by 10.
|
||||
//
|
||||
// @param rdi is number to divide
|
||||
// @return quotient
|
||||
div10int64:
|
||||
mov $2,%cl
|
||||
movabs $0x6666666666666667,%rdx
|
||||
jmp tinydivsi
|
||||
.endfn div10int64,globl
|
|
@ -1 +0,0 @@
|
|||
These files aren't intended to be compiled.
|
|
@ -53,7 +53,19 @@ kCpuids:.long 0,0,0,0 # EAX=0 (Basic Processor Info)
|
|||
mov %rdi,%r8
|
||||
xor %eax,%eax
|
||||
1: xor %ecx,%ecx
|
||||
#ifdef FEATURELESS
|
||||
// It's been reported that GDB reverse debugging doesn't
|
||||
// understand VEX encoding. The workaround is to put:
|
||||
//
|
||||
// CPPFLAGS = -DFEATURELESS
|
||||
//
|
||||
// Inside your ~/.cosmo.mk file.
|
||||
xor %eax,%eax
|
||||
xor %ebx,%ebx
|
||||
xor %edx,%edx
|
||||
#else
|
||||
cpuid
|
||||
#endif
|
||||
stosl
|
||||
xchg %eax,%ebx
|
||||
stosl
|
||||
|
|
|
@ -1,48 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Creates bit mask of which bytes are the same.
|
||||
//
|
||||
// @param %rdi points to bit mask (write-only)
|
||||
// @param %rsi points to first buffer (read-only)
|
||||
// @param %rdx points to second buffer (read-only)
|
||||
// @param %rcx is byte length of both %rsi and %rdx
|
||||
// @return %rax is set to %rdi
|
||||
// @note buffers should be 128-byte aligned
|
||||
memeqmask:
|
||||
.leafprologue
|
||||
xor %eax,%eax
|
||||
test %ecx,%ecx
|
||||
jz 1f
|
||||
shr $3,%ecx
|
||||
0: movdqa (%rsi,%rax,8),%xmm0
|
||||
movdqa 16(%rsi,%rax,8),%xmm1
|
||||
pcmpeqb (%rdx,%rax,8),%xmm0
|
||||
pcmpeqb 16(%rdx,%rax,8),%xmm1
|
||||
pmovmskb %xmm0,%r8d
|
||||
pmovmskb %xmm1,%r9d
|
||||
mov %r8w,(%rdi,%rax)
|
||||
mov %r9w,2(%rdi,%rax)
|
||||
add $4,%eax
|
||||
cmp %ecx,%eax
|
||||
jb 0b
|
||||
1: mov %rdi,%rax
|
||||
.leafepilogue
|
||||
.endfn memeqmask,globl
|
|
@ -1,62 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Searches for last instance of uint16_t in memory region.
|
||||
//
|
||||
// @param rdi points to data to search
|
||||
// @param esi is treated as uint16_t
|
||||
// @param rdx is short count in rdi
|
||||
// @return rax is address of last %si in %rdi, or NULL
|
||||
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
memrchr16:
|
||||
.leafprologue
|
||||
.profilable
|
||||
#if !IsTiny()
|
||||
cmp $16,%rdx
|
||||
jb 5f
|
||||
testb X86_HAVE(AVX2)+kCpuids(%rip)
|
||||
jz 5f
|
||||
vmovd %esi,%xmm0
|
||||
vpbroadcastw %xmm0,%ymm0
|
||||
3: vmovdqu -32(%rdi,%rdx,2),%ymm1
|
||||
vpcmpeqw %ymm1,%ymm0,%ymm1
|
||||
vpmovmskb %ymm1,%eax
|
||||
lzcnt %eax,%eax
|
||||
shr %eax
|
||||
mov %eax,%ecx
|
||||
sub %rcx,%rdx
|
||||
cmp $16,%eax
|
||||
jne 5f
|
||||
cmp $15,%rdx
|
||||
ja 3b
|
||||
vzeroupper
|
||||
#endif
|
||||
5: xor %eax,%eax
|
||||
mov %rdx,%rcx
|
||||
6: sub $1,%rcx
|
||||
jb 9f
|
||||
cmp %si,-2(%rdi,%rdx,2)
|
||||
mov %rcx,%rdx
|
||||
jne 6b
|
||||
lea (%rdi,%rcx,2),%rax
|
||||
9: .leafepilogue
|
||||
.endfn memrchr16,globl
|
|
@ -1,62 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Searches for last instance of wchar_t in memory region.
|
||||
//
|
||||
// @param rdi points to data to search
|
||||
// @param esi is treated as int32_t (officially wchar_t)
|
||||
// @param rdx is short count in rdi
|
||||
// @return rax is address of last %esi in %rdi, or NULL
|
||||
// @note AVX2 requires Haswell (2014+) or Excavator (2015+)
|
||||
wmemrchr:
|
||||
.leafprologue
|
||||
.profilable
|
||||
#if !IsTiny()
|
||||
cmp $8,%rdx
|
||||
jb 5f
|
||||
testb X86_HAVE(AVX2)+kCpuids(%rip)
|
||||
jz 5f
|
||||
vmovd %esi,%xmm0
|
||||
vpbroadcastd %xmm0,%ymm0
|
||||
3: vmovdqu -32(%rdi,%rdx,4),%ymm1
|
||||
vpcmpeqd %ymm1,%ymm0,%ymm1
|
||||
vpmovmskb %ymm1,%eax
|
||||
lzcnt %eax,%eax
|
||||
shr $2,%eax
|
||||
mov %eax,%ecx
|
||||
sub %rcx,%rdx
|
||||
cmp $8,%eax
|
||||
jne 5f
|
||||
cmp $7,%rdx
|
||||
ja 3b
|
||||
vzeroupper
|
||||
#endif
|
||||
5: xor %eax,%eax
|
||||
mov %rdx,%rcx
|
||||
6: sub $1,%rcx
|
||||
jb 9f
|
||||
cmp %esi,-4(%rdi,%rdx,4)
|
||||
mov %rcx,%rdx
|
||||
jne 6b
|
||||
lea (%rdi,%rcx,4),%rax
|
||||
9: .leafepilogue
|
||||
.endfn wmemrchr,globl
|
|
@ -10,23 +10,6 @@ void imapxlatab(void *);
|
|||
void insertionsort(int32_t *, size_t);
|
||||
void CheckStackIsAligned(void);
|
||||
|
||||
int64_t div10int64(int64_t) libcesque pureconst;
|
||||
int64_t div100int64(int64_t) libcesque pureconst;
|
||||
int64_t div1000int64(int64_t) libcesque pureconst;
|
||||
int64_t div10000int64(int64_t) libcesque pureconst;
|
||||
int64_t div1000000int64(int64_t) libcesque pureconst;
|
||||
int64_t div1000000000int64(int64_t) libcesque pureconst;
|
||||
|
||||
int64_t rem10int64(int64_t) libcesque pureconst;
|
||||
int64_t rem100int64(int64_t) libcesque pureconst;
|
||||
int64_t rem1000int64(int64_t) libcesque pureconst;
|
||||
int64_t rem10000int64(int64_t) libcesque pureconst;
|
||||
int64_t rem1000000int64(int64_t) libcesque pureconst;
|
||||
int64_t rem1000000000int64(int64_t) libcesque pureconst;
|
||||
|
||||
char sbb(uint64_t *, const uint64_t *, const uint64_t *, size_t);
|
||||
char adc(uint64_t *, const uint64_t *, const uint64_t *, size_t);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_NEXGEN32E_H_ */
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 1,000,000,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000000000int64:
|
||||
movabs $0x112e0be826d694b3,%rdx
|
||||
mov %rdi,%rax
|
||||
imul %rdx
|
||||
mov %rdx,%rax
|
||||
sar $0x1a,%rax
|
||||
mov %rdi,%rdx
|
||||
sar $0x3f,%rdx
|
||||
sub %rdx,%rax
|
||||
imul $0x3b9aca00,%rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem1000000000int64,globl
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 1,000,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000000int64:
|
||||
movabs $0x431bde82d7b634db,%rdx
|
||||
mov %rdi,%rax
|
||||
imul %rdx
|
||||
mov %rdx,%rax
|
||||
sar $0x12,%rax
|
||||
mov %rdi,%rdx
|
||||
sar $0x3f,%rdx
|
||||
sub %rdx,%rax
|
||||
imul $0xf4240,%rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem1000000int64,globl
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 10,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem10000int64:
|
||||
mov %rdi,%rax
|
||||
movabsq $0x346dc5d63886594b,%rdx
|
||||
imulq %rdx
|
||||
mov %rdx,%rax
|
||||
mov %rdi,%rdx
|
||||
sar $11,%rax
|
||||
sar $63,%rdx
|
||||
sub %rdx,%rax
|
||||
imulq $10000,%rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem10000int64,globl
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 1,000.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem1000int64:
|
||||
movabs $0x20c49ba5e353f7cf,%rdx
|
||||
mov %rdi,%rax
|
||||
imul %rdx
|
||||
mov %rdx,%rax
|
||||
sar $0x7,%rax
|
||||
mov %rdi,%rdx
|
||||
sar $0x3f,%rdx
|
||||
sub %rdx,%rax
|
||||
imul $0x3e8,%rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem1000int64,globl
|
|
@ -1,38 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 100.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem100int64:
|
||||
mov %rdi,%rax
|
||||
movabsq $-6640827866535438581,%rdx
|
||||
imul %rdx
|
||||
lea (%rdx,%rdi),%rax
|
||||
mov %rdi,%rdx
|
||||
sar $6,%rax
|
||||
sar $63,%rdx
|
||||
sub %rdx,%rax
|
||||
imul $100,%rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem100int64,globl
|
|
@ -1,39 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns 𝑥 % 10.
|
||||
//
|
||||
// @param rdi int64 𝑥
|
||||
// @return rax has remainder
|
||||
rem10int64:
|
||||
movabs $0x6666666666666667,%rdx
|
||||
mov %rdi,%rax
|
||||
imul %rdx
|
||||
mov %rdx,%rax
|
||||
sar $0x2,%rax
|
||||
mov %rdi,%rdx
|
||||
sar $0x3f,%rdx
|
||||
sub %rdx,%rax
|
||||
lea (%rax,%rax,4),%rax
|
||||
add %rax,%rax
|
||||
sub %rax,%rdi
|
||||
mov %rdi,%rax
|
||||
ret
|
||||
.endfn rem10int64,globl
|
|
@ -1,83 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/notice.inc"
|
||||
|
||||
// Applies no-clobber guarantee to System Five function call.
|
||||
//
|
||||
// - Reentrant
|
||||
// - Realigns stack
|
||||
// - Doesn't assume red zone
|
||||
// - Clobbers nothing (except %rax and flags)
|
||||
//
|
||||
// This function may be called using an stdcall convention. It's
|
||||
// useful for files named FOO.hookabi.c and BAR.ncabi.c to make
|
||||
// calls into other parts of the system, that don't conform to the
|
||||
// same restricted ABI.
|
||||
//
|
||||
// @param six args and fn addr pushed on stack in reverse order
|
||||
// @return %rax has function return value, and stack is cleaned up
|
||||
// @see libc/shadowargs.hook.c for intended use case
|
||||
slowcall:
|
||||
#param %r9 # 0x40 arg6
|
||||
#param %r8 # 0x38 arg5
|
||||
#param %rcx # 0x30 arg4
|
||||
#param %rdx # 0x28 arg3
|
||||
#param %rsi # 0x20 arg2
|
||||
#param %rdi # 0x18 arg1
|
||||
#param %rax # 0x10 call address
|
||||
#param # 0x08 return address
|
||||
push %rbp # 0x00 parent frame
|
||||
mov %rsp,%rbp # ----
|
||||
push %rdi #-0x08
|
||||
push %rsi #-0x10
|
||||
push %rdx #-0x18
|
||||
push %rcx #-0x20
|
||||
push %r8 #-0x28
|
||||
push %r9 #-0x30
|
||||
push %r10 #-0x38
|
||||
push %r11 #-0x40
|
||||
mov 0x10(%rbp),%rax
|
||||
mov 0x18(%rbp),%rdi
|
||||
mov 0x20(%rbp),%rsi
|
||||
mov 0x28(%rbp),%rdx
|
||||
mov 0x30(%rbp),%rcx
|
||||
mov 0x38(%rbp),%r8
|
||||
mov 0x40(%rbp),%r9
|
||||
and $-16,%rsp
|
||||
call *%rax
|
||||
push %rax
|
||||
mov 0x00(%rbp),%rax
|
||||
mov %rax,0x38(%rbp)
|
||||
mov 0x08(%rbp),%rax
|
||||
mov %rax,0x40(%rbp)
|
||||
pop %rax
|
||||
lea -0x40(%rbp),%rsp
|
||||
pop %r11
|
||||
pop %r10
|
||||
pop %r9
|
||||
pop %r8
|
||||
pop %rcx
|
||||
pop %rdx
|
||||
pop %rsi
|
||||
pop %rdi
|
||||
lea 0x38(%rbp),%rsp
|
||||
pop %rbp
|
||||
ret
|
||||
.endfn slowcall,globl
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_
|
||||
#define COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define slowcall(fn, arg1, arg2, arg3, arg4, arg5, arg6) \
|
||||
({ \
|
||||
void *ax; \
|
||||
asm volatile("push\t%7\n\t" \
|
||||
"push\t%6\n\t" \
|
||||
"push\t%5\n\t" \
|
||||
"push\t%4\n\t" \
|
||||
"push\t%3\n\t" \
|
||||
"push\t%2\n\t" \
|
||||
"push\t%1\n\t" \
|
||||
"call\tslowcall" \
|
||||
: "=a"(ax) \
|
||||
: "g"(fn), "g"(arg1), "g"(arg2), "g"(arg3), "g"(arg4), \
|
||||
"g"(arg5), "g"(arg6) \
|
||||
: "memory"); \
|
||||
ax; \
|
||||
})
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_NEXGEN32E_SLOWCALL_H_ */
|
|
@ -1,33 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// TODO(jart): pmovzxbw and vpunpcklbw
|
||||
strcpyzbw:
|
||||
.leafprologue
|
||||
.profilable
|
||||
push %rdi
|
||||
xor %eax,%eax
|
||||
1: lodsb
|
||||
stosw
|
||||
test %al,%al
|
||||
jnz 1b
|
||||
pop %rax
|
||||
.leafepilogue
|
||||
.endfn strcpyzbw,globl
|
|
@ -1,406 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/nexgen32e/macros.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Returns length of NUL-terminated string w/ security blankets.
|
||||
//
|
||||
// This is like strnlen() except it'll return 0 if (1) RDI is NULL
|
||||
// or (2) a NUL-terminator wasn't found in the first RSI bytes.
|
||||
//
|
||||
// @param rdi is a nullable NUL-terminated string pointer
|
||||
// @param rsi is the maximum number of bytes to consider
|
||||
// @return rax is the number of bytes, excluding the NUL
|
||||
strnlen_s:
|
||||
.leafprologue
|
||||
.profilable
|
||||
xor %eax,%eax
|
||||
xor %r10d,%r10d
|
||||
test %rdi,%rdi
|
||||
jnz 0f
|
||||
.leafepilogue
|
||||
0: xor %edx,%edx
|
||||
mov %rdi,%r8
|
||||
// 𝑠𝑙𝑖𝑑𝑒
|
||||
.endfn strnlen_s,globl
|
||||
|
||||
// Swiss army knife of string character scanning.
|
||||
// Used to be fourteen fast functions in one.
|
||||
//
|
||||
// @param rdi is non-null string memory
|
||||
// @param rsi is max number of bytes to consider
|
||||
// @param dl is search character #1
|
||||
// @param dh is search character #2
|
||||
// @param r8 is subtracted from result (for length vs. pointer)
|
||||
// @param r9 masks result if DH is found (for NUL vs. NULL)
|
||||
// @param r10 masks result on bytes exhausted (for length v. NULL)
|
||||
// @return rax end pointer after r8/r9/r10 modifications
|
||||
strsak: lea -1(%rdi),%rax
|
||||
1: add $1,%rax
|
||||
sub $1,%rsi
|
||||
jb .Lend
|
||||
test $31,%al
|
||||
jz .Lfast
|
||||
.Lbyte: mov (%rax),%cl
|
||||
cmp %cl,%dl
|
||||
je .Ldone
|
||||
cmp %cl,%dh
|
||||
je .Lnul
|
||||
jmp 1b
|
||||
.Ldone: sub %r8,%rax
|
||||
jmp .Lret
|
||||
.Lend: mov %r10,%r9
|
||||
.Lnul: sub %r8,%rax
|
||||
and %r9,%rax
|
||||
.Lret: .leafepilogue
|
||||
.Lslow: add $32,%rsi
|
||||
jmp .Lbyte
|
||||
.Lfast: movzbl %dl,%ecx
|
||||
movd %ecx,%xmm0
|
||||
movzbl %dh,%ecx
|
||||
movd %ecx,%xmm1
|
||||
sub $32,%rax
|
||||
#if !X86_NEED(AVX2)
|
||||
testb X86_HAVE(AVX2)+kCpuids(%rip)
|
||||
jz .Lsse2
|
||||
#endif
|
||||
vpbroadcastb %xmm0,%ymm0
|
||||
vpbroadcastb %xmm1,%ymm1
|
||||
1: add $32,%rax
|
||||
sub $32,%rsi
|
||||
9: jb .Lslow
|
||||
vmovdqa (%rax),%ymm2
|
||||
vpcmpeqb %ymm0,%ymm2,%ymm3
|
||||
vpcmpeqb %ymm1,%ymm2,%ymm2
|
||||
vpor %ymm3,%ymm2,%ymm2
|
||||
vpmovmskb %ymm2,%ecx
|
||||
bsf %ecx,%ecx
|
||||
je 1b
|
||||
vzeroupper
|
||||
2: add %rcx,%rax
|
||||
jmp .Lbyte
|
||||
#if !X86_NEED(AVX2)
|
||||
.Lsse2: pbroadcastb %xmm0
|
||||
pbroadcastb %xmm1
|
||||
1: add $32,%rax
|
||||
sub $32,%rsi
|
||||
jb 9b
|
||||
movdqa (%rax),%xmm2
|
||||
movdqa 16(%rax),%xmm3
|
||||
movdqa %xmm3,%xmm4
|
||||
pcmpeqb %xmm0,%xmm3
|
||||
pcmpeqb %xmm1,%xmm4
|
||||
por %xmm4,%xmm3
|
||||
pmovmskb %xmm3,%ecx
|
||||
shl $16,%ecx
|
||||
movdqa %xmm2,%xmm4
|
||||
pcmpeqb %xmm0,%xmm2
|
||||
pcmpeqb %xmm1,%xmm4
|
||||
por %xmm4,%xmm2
|
||||
pmovmskb %xmm2,%r11d
|
||||
or %r11d,%ecx
|
||||
bsf %ecx,%ecx
|
||||
je 1b
|
||||
jmp 2b
|
||||
#endif
|
||||
.endfn strsak,globl,hidden
|
||||
|
||||
/* benchmarked on intel core i7-6700 @ 3.40GHz (skylake)
|
||||
includes function call overhead (unless marked otherwise)
|
||||
|
||||
your strlen, &c (strsak+avx2) for #c per n where c ≈ 0.293ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 47.000 36.375 35.141 99
|
||||
1 35.000 34.625 36.234 96
|
||||
2 31.500 18.812 18.992 184
|
||||
3 19.667 13.042 13.182 265
|
||||
4 30.750 10.281 10.285 339
|
||||
7 15.857 8.946 7.551 462
|
||||
8 12.125 9.203 7.119 490
|
||||
15 10.467 5.475 4.601 758
|
||||
16 6.812 5.523 4.798 727
|
||||
31 5.387 4.327 3.517 992
|
||||
32 4.719 1.645 1.532 2278
|
||||
63 5.000 2.403 2.034 1715
|
||||
64 2.047 0.779 0.788 4427
|
||||
127 2.134 1.194 1.027 3399
|
||||
128 1.742 0.444 0.419 8327
|
||||
255 0.945 0.594 0.554 6295
|
||||
256 0.574 0.271 0.264 13226
|
||||
511 0.785 0.362 0.307 11384
|
||||
512 0.326 0.178 0.151 23134
|
||||
1023 0.288 0.242 0.185 18862
|
||||
1024 0.208 0.114 0.107 32565
|
||||
2047 0.235 0.127 0.123 28430
|
||||
2048 0.127 0.090 0.084 41413
|
||||
4095 0.119 0.106 0.099 35116
|
||||
4096 0.100 0.081 0.079 44372
|
||||
8191 0.092 0.082 0.081 43176
|
||||
8192 0.081 0.072 0.071 49419
|
||||
16383 0.076 0.072 0.071 48847
|
||||
16384 0.071 0.068 0.067 52381
|
||||
32767 0.072 0.069 0.068 51154
|
||||
32768 0.068 0.066 0.065 53409
|
||||
|
||||
your tinystrlen()
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 53.000 33.625 33.672 97
|
||||
1 33.000 32.125 32.234 101
|
||||
2 24.500 19.438 17.711 184
|
||||
3 23.667 12.875 11.911 273
|
||||
4 13.750 9.281 9.238 352
|
||||
7 11.000 6.125 5.801 560
|
||||
8 7.625 5.609 5.232 621
|
||||
15 11.800 3.825 3.364 966
|
||||
16 4.562 3.648 3.173 1024 « optimal
|
||||
31 3.710 2.851 2.298 1414
|
||||
32 3.031 2.254 2.159 1506 « dropoff
|
||||
63 2.683 1.827 1.691 1922
|
||||
64 2.078 1.932 1.689 1924
|
||||
127 1.630 1.647 1.622 2004
|
||||
128 1.727 1.671 1.652 1968
|
||||
255 1.392 1.450 1.435 2265
|
||||
256 1.473 1.427 1.437 2262
|
||||
511 1.325 1.353 1.337 2431
|
||||
512 1.408 1.343 1.337 2431
|
||||
1023 1.289 1.281 1.287 2525
|
||||
1024 1.269 1.295 1.297 2506
|
||||
2047 1.269 1.274 1.269 2561
|
||||
2048 1.280 1.263 1.281 2538
|
||||
4095 1.262 1.270 1.266 2568
|
||||
4096 1.270 1.264 1.265 2570
|
||||
8191 1.253 1.254 1.254 2592
|
||||
8192 1.219 1.224 1.225 2653
|
||||
16383 1.225 1.222 1.220 2663
|
||||
16384 1.226 1.221 1.222 2659
|
||||
32767 1.227 1.224 1.223 2658
|
||||
32768 1.220 1.221 1.222 2659
|
||||
|
||||
glibc strlen for #c per n where c ≈ 0.273ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 3497.000 53.125 42.641 82
|
||||
1 69.000 44.875 42.547 82
|
||||
2 45.500 24.188 21.852 160
|
||||
3 23.000 15.625 14.557 240
|
||||
4 22.250 11.406 10.637 328
|
||||
7 10.143 6.768 6.230 560
|
||||
8 11.125 5.797 5.486 636
|
||||
15 5.800 3.142 2.859 1220
|
||||
16 7.062 3.070 2.737 1275
|
||||
31 2.806 1.585 1.407 2481
|
||||
32 3.156 1.574 1.349 2587
|
||||
63 2.016 0.895 0.691 5049
|
||||
64 1.328 0.744 0.670 5207
|
||||
127 1.441 0.521 0.407 8577
|
||||
128 0.648 0.454 0.405 8619
|
||||
255 0.553 0.286 0.214 16277
|
||||
256 0.387 0.235 0.218 15984
|
||||
511 0.456 0.151 0.129 27077
|
||||
512 0.182 0.134 0.129 27117
|
||||
1023 0.171 0.106 0.082 42795
|
||||
1024 0.112 0.088 0.082 42741
|
||||
2047 0.099 0.069 0.059 59537
|
||||
2048 0.072 0.060 0.058 59925
|
||||
4095 0.065 0.053 0.047 74122
|
||||
4096 0.061 0.048 0.047 74478
|
||||
8191 0.048 0.045 0.044 79117
|
||||
8192 0.051 0.045 0.044 79181
|
||||
16383 0.042 0.040 0.061 57018
|
||||
16384 0.069 0.063 0.061 57245
|
||||
32767 0.081 0.073 0.068 51426
|
||||
32768 0.084 0.072 0.068 51285
|
||||
|
||||
GCC strlen (-Os REPNZ SCASB) for #c per n where c ≈ 0.293ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 103.000 84.125 88.766 37
|
||||
1 81.000 85.125 87.328 37
|
||||
2 43.500 44.562 45.508 71
|
||||
3 33.000 30.208 30.995 105
|
||||
4 24.750 23.156 23.113 141
|
||||
7 17.000 13.054 15.355 212
|
||||
8 13.375 14.047 13.982 232
|
||||
15 9.533 9.258 55.111 59
|
||||
16 6.312 6.352 6.364 511
|
||||
31 4.032 4.141 4.141 785
|
||||
32 3.969 4.059 4.048 803
|
||||
63 2.937 2.970 2.995 1086
|
||||
64 2.922 2.939 2.956 1100
|
||||
127 2.386 2.408 2.403 1353
|
||||
128 2.383 2.403 2.401 1354
|
||||
255 2.129 2.118 2.124 1530
|
||||
256 2.137 2.133 2.130 1526
|
||||
511 1.982 1.986 3.351 970
|
||||
512 1.982 1.990 1.986 1637
|
||||
1023 1.915 1.916 2.587 1257
|
||||
1024 1.868 1.867 1.866 1742
|
||||
2047 1.835 1.833 1.832 1775
|
||||
2048 1.830 1.831 1.832 1775
|
||||
4095 1.814 1.814 1.815 1791
|
||||
4096 1.810 1.815 1.815 1791
|
||||
8191 1.805 1.807 1.806 1800
|
||||
8192 1.805 1.806 1.806 1800
|
||||
16383 1.803 1.756 1.756 1851
|
||||
16384 1.758 1.756 1.756 1851
|
||||
32767 1.756 1.754 1.754 1853
|
||||
32768 1.756 1.754 1.754 1853
|
||||
|
||||
Intel Optimz. Manual (SSE4.2) for #c per n where c ≈ 0.273ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 37.000 43.125 34.078 102
|
||||
1 33.000 33.875 34.016 103
|
||||
2 39.500 17.188 17.555 199
|
||||
3 18.333 12.208 12.036 290
|
||||
4 30.250 9.344 9.137 382
|
||||
7 14.429 5.732 5.766 605
|
||||
8 7.875 6.797 5.354 652
|
||||
15 10.733 5.825 3.516 993
|
||||
16 3.812 2.383 2.325 1501
|
||||
31 4.097 2.609 2.079 1678
|
||||
32 3.031 1.395 1.349 2587
|
||||
63 2.937 1.558 1.079 3235
|
||||
64 2.016 0.893 0.690 5056
|
||||
127 1.929 0.721 0.607 5745
|
||||
128 0.617 0.483 0.428 8147
|
||||
255 1.275 0.404 0.411 8486
|
||||
256 0.480 0.319 0.299 11681
|
||||
511 0.479 0.307 0.288 12127
|
||||
512 0.322 0.244 0.232 15013
|
||||
1023 0.324 0.224 0.225 15512
|
||||
1024 0.245 0.240 0.223 15651
|
||||
2047 0.222 0.213 0.206 16938
|
||||
2048 0.204 0.194 0.192 18140
|
||||
4095 0.204 0.188 0.185 18888
|
||||
4096 0.183 0.179 0.179 19446
|
||||
8191 0.179 0.176 0.174 20000
|
||||
8192 0.174 0.172 0.171 20383
|
||||
16383 0.171 0.170 0.169 20604
|
||||
16384 0.169 0.169 0.168 20808
|
||||
32767 0.213 0.225 0.267 13064
|
||||
32768 0.231 0.215 0.220 15852
|
||||
|
||||
musl libc strlen for #c per n where c ≈ 0.273ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 65.000 36.125 37.984 92
|
||||
1 39.000 37.625 37.422 93
|
||||
2 41.500 21.938 20.695 169
|
||||
3 22.333 17.625 15.859 220
|
||||
4 21.250 13.656 12.105 288
|
||||
7 22.143 9.018 7.609 459
|
||||
8 31.125 7.234 7.346 475
|
||||
15 11.267 5.025 4.709 741
|
||||
16 9.438 4.039 3.849 907
|
||||
31 4.871 3.133 2.488 1402
|
||||
32 5.219 2.246 2.039 1712
|
||||
63 4.302 1.462 1.407 2479
|
||||
64 2.109 1.428 1.155 3023
|
||||
127 1.551 1.078 0.879 3971
|
||||
128 1.742 0.903 0.760 4591
|
||||
255 0.922 0.558 0.605 5764
|
||||
256 0.934 0.575 0.537 6495
|
||||
511 0.550 0.493 0.455 7674
|
||||
512 0.646 0.490 0.426 8183
|
||||
1023 0.550 0.439 0.425 8203
|
||||
1024 0.472 0.421 0.408 8549
|
||||
2047 0.507 0.334 0.373 9360
|
||||
2048 0.403 0.426 0.409 8540
|
||||
4095 0.391 0.240 0.236 14799
|
||||
4096 0.238 0.222 0.221 15766
|
||||
8191 0.225 0.223 0.221 15779
|
||||
8192 0.225 0.214 0.215 16250
|
||||
16383 0.212 0.212 0.210 16595
|
||||
16384 0.209 0.210 0.211 16535
|
||||
32767 0.214 0.208 0.205 17001
|
||||
32768 0.207 0.207 0.291 12002
|
||||
|
||||
newlib strlen for #c per n where c ≈ 0.273ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 33.000 34.625 34.141 102
|
||||
1 33.000 34.125 33.984 103
|
||||
2 58.500 18.562 17.508 199
|
||||
3 16.333 12.792 12.016 290
|
||||
4 19.250 9.219 9.215 379
|
||||
7 17.571 6.089 5.685 614
|
||||
8 16.625 5.078 5.432 642
|
||||
15 8.467 4.042 3.207 1088
|
||||
16 3.938 2.773 2.733 1277
|
||||
31 3.645 1.673 1.598 2183
|
||||
32 3.281 1.527 1.493 2338
|
||||
63 2.619 1.042 0.895 3901
|
||||
64 1.422 0.928 0.813 4294
|
||||
127 0.984 0.718 0.561 6222
|
||||
128 1.195 0.591 0.532 6558
|
||||
255 0.600 0.404 0.397 8785
|
||||
256 0.621 0.429 0.376 9280
|
||||
511 0.346 0.311 0.306 11421
|
||||
512 0.420 0.308 0.296 11776
|
||||
1023 0.284 0.285 0.285 12237
|
||||
1024 0.321 0.282 0.280 12456
|
||||
2047 0.253 0.252 0.252 13864
|
||||
2048 0.260 0.249 0.249 14012
|
||||
4095 0.236 0.236 0.236 14811
|
||||
4096 0.239 0.235 0.234 14906
|
||||
8191 0.233 0.228 0.227 15371
|
||||
8192 0.230 0.227 0.227 15397
|
||||
16383 0.223 0.224 0.223 15638
|
||||
16384 0.223 0.224 0.223 15663
|
||||
32767 0.224 0.387 0.225 15527
|
||||
32768 0.223 0.222 0.222 15724
|
||||
|
||||
Agner Fog's strlen (SSE2) for #c per n where c ≈ 0.273ns
|
||||
N x1 x8 x64 mBps
|
||||
------------------------------------------------------------
|
||||
1 59.000 38.375 38.453 91
|
||||
1 37.000 38.625 38.234 91
|
||||
2 18.500 19.062 19.273 181
|
||||
3 13.000 12.792 12.859 271
|
||||
4 9.250 9.594 9.660 361
|
||||
7 5.286 5.554 5.502 634
|
||||
8 4.625 4.703 4.791 728
|
||||
15 2.600 2.858 2.622 1331
|
||||
16 2.438 2.414 2.421 1442
|
||||
31 2.161 1.399 1.290 2706
|
||||
32 1.219 1.262 1.250 2793
|
||||
63 1.508 0.875 0.693 5038
|
||||
64 0.641 0.654 0.655 5328
|
||||
127 1.205 0.406 0.379 9200
|
||||
128 0.367 0.372 0.369 9463
|
||||
255 0.467 0.310 0.235 14835
|
||||
256 0.230 0.232 0.232 15034
|
||||
511 0.272 0.181 0.159 21918
|
||||
512 0.174 0.161 0.158 22148
|
||||
1023 0.175 0.134 0.120 29043
|
||||
1024 0.140 0.122 0.120 29005
|
||||
2047 0.128 0.114 0.112 31205
|
||||
2048 0.130 0.113 0.112 31242
|
||||
4095 0.105 0.098 0.097 35984
|
||||
4096 0.105 0.098 0.097 35973
|
||||
8191 0.093 0.090 0.090 38953
|
||||
8192 0.094 0.090 0.090 38986
|
||||
16383 0.088 0.086 0.086 40648
|
||||
16384 0.088 0.086 0.086 40652
|
||||
32767 0.088 0.086 0.085 40956
|
||||
32768 0.087 0.085 0.085 41114 */
|
|
@ -1,41 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/pcmpstr.inc"
|
||||
#include "libc/nexgen32e/strstr.inc"
|
||||
|
||||
// TODO(jart): Fix me.
|
||||
strstr_sse42:
|
||||
.leafprologue
|
||||
mov %rdi,%rax
|
||||
xor %ecx,%ecx
|
||||
0: mov $-16,%rdx
|
||||
1: add $16,%rdx
|
||||
movaps (%rsi,%rdx),%xmm0
|
||||
2: add %rcx,%rax
|
||||
lea (%rax,%rdx),%rdi
|
||||
pcmpistri $.Lequalordered,(%rdi),%xmm0
|
||||
3: ja 2b # !CF (no match) && !ZF (need NUL-term)
|
||||
jnc 4f # !CF (no match) && ZF (NUL-terminator)
|
||||
jno 0b # !OF ← CF && CX!=0 (matched at offset)
|
||||
jns 1b # !SF ← NUL ∉ XMM1 (need to match more)
|
||||
jmp 5f # youtu.be/nVk1DjMtLWs
|
||||
4: xor %eax,%eax
|
||||
5: .leafepilogue
|
||||
.endfn strstr_sse42,globl,hidden
|
|
@ -1,68 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 sw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
/* clang-format off */
|
||||
|
||||
// Searches for substring.
|
||||
//
|
||||
// @param rdi is NUL-terminated haystack string
|
||||
// @param rsi is NUL-terminated needle string (16-byte aligned)
|
||||
// @return rax is pointer to substring or NULL
|
||||
// @todo 10x faster than naïve but could be 100x faster
|
||||
.macro .strstr mode:req
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
.profilable
|
||||
sub $32,%rsp
|
||||
mov %rdi,%rax
|
||||
xor %ecx,%ecx
|
||||
0: mov $-16,%rdx
|
||||
1: add $16,%rdx
|
||||
movaps (%rsi,%rdx),%xmm0
|
||||
2: add %rcx,%rax
|
||||
lea (%rax,%rdx),%rdi
|
||||
test $15,%edi
|
||||
jnz 6f
|
||||
pcmpistri $\mode,(%rdi),%xmm0
|
||||
3: ja 2b # !CF (no match) && !ZF (need NUL-term)
|
||||
jnc 4f # !CF (no match) && ZF (NUL-terminator)
|
||||
jno 0b # !OF ← CF && CX!=0 (matched at offset)
|
||||
jns 1b # !SF ← NUL ∉ XMM1 (need to match more)
|
||||
jmp 5f # youtu.be/nVk1DjMtLWs
|
||||
4: xor %eax,%eax
|
||||
5: leave
|
||||
ret
|
||||
6: mov %rdi,%r9 # same w/ pointer realign
|
||||
and $15,%r9d
|
||||
mov %edi,%r8d
|
||||
and $0xfff,%r8d
|
||||
cmp $0xff0,%r8d
|
||||
ja 8f
|
||||
7: pcmpistri $\mode,(%rdi),%xmm0
|
||||
cmova %r9d,%ecx
|
||||
jmp 3b
|
||||
8: pcmpeqd %xmm2,%xmm2 # handle danger memory
|
||||
mov %rdi,%r8
|
||||
and $-16,%r8
|
||||
movaps (%r8),%xmm1
|
||||
movaps %xmm1,-32(%rbp)
|
||||
movaps %xmm2,-16(%rbp)
|
||||
pcmpistri $\mode,-32(%rbp,%r9),%xmm2
|
||||
jz 4b
|
||||
jmp 7b
|
||||
.endm
|
|
@ -1,26 +0,0 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/pcmpstr.inc"
|
||||
#include "libc/nexgen32e/strstr.inc"
|
||||
|
||||
// TODO(jart): Fix me.
|
||||
strstr16$sse42:
|
||||
.strstr .Lequalorder16
|
||||
.endfn strstr16$sse42,globl,hidden
|
|
@ -70,9 +70,7 @@ noasan static inline const unsigned char *memchr_sse(const unsigned char *s,
|
|||
void *memchr(const void *s, int c, size_t n) {
|
||||
const void *r;
|
||||
if (!IsTiny() && X86_HAVE(SSE)) {
|
||||
if (IsAsan()) {
|
||||
__asan_verify(s, n);
|
||||
}
|
||||
if (IsAsan()) __asan_verify(s, n);
|
||||
r = memchr_sse(s, c, n);
|
||||
} else {
|
||||
r = memchr_pure(s, c, n);
|
||||
|
|
|
@ -68,9 +68,7 @@ noasan static inline const unsigned char *memrchr_sse(const unsigned char *s,
|
|||
void *memrchr(const void *s, int c, size_t n) {
|
||||
const void *r;
|
||||
if (!IsTiny() && X86_HAVE(SSE)) {
|
||||
if (IsAsan()) {
|
||||
__asan_verify(s, n);
|
||||
}
|
||||
if (IsAsan()) __asan_verify(s, n);
|
||||
r = memrchr_sse(s, c, n);
|
||||
} else {
|
||||
r = memrchr_pure(s, c, n);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,46 +16,61 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/str/internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
#define kVectorSize 32 /* x86+avx2 is 256-bit cpu */
|
||||
typedef char16_t xmm_t __attribute__((__vector_size__(16), __aligned__(2)));
|
||||
|
||||
typedef uint8_t uint8_v _Vector_size(kVectorSize);
|
||||
typedef uint32_t vbitmask_t;
|
||||
static inline const char16_t *memrchr16_pure(const char16_t *s, char16_t c,
|
||||
size_t n) {
|
||||
size_t i;
|
||||
for (i = n; i--;) {
|
||||
if (s[i] == c) {
|
||||
return s + i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
noasan static inline const char16_t *memrchr16_sse(const char16_t *s,
|
||||
char16_t c, size_t n) {
|
||||
size_t i;
|
||||
unsigned k, m;
|
||||
xmm_t v, t = {c, c, c, c, c, c, c, c};
|
||||
for (i = n; i >= 8;) {
|
||||
v = *(const xmm_t *)(s + (i -= 8));
|
||||
m = __builtin_ia32_pmovmskb128(v == t);
|
||||
if (m) {
|
||||
m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1);
|
||||
return s + i + m / 2;
|
||||
}
|
||||
}
|
||||
while (i--) {
|
||||
if (s[i] == c) {
|
||||
return s + i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns how many bytes the utf16 string would be as utf8.
|
||||
* Returns pointer to first instance of character.
|
||||
*
|
||||
* @param s is memory to search
|
||||
* @param c is search byte which is masked with 65535
|
||||
* @param n is number of char16_t elements in `s`
|
||||
* @return is pointer to first instance of c or NULL if not found
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
int strcmp_avx2(const char *s1, const char *s2) {
|
||||
if (s1 == s2) return 0;
|
||||
const unsigned char *p1 = (const unsigned char *)s1;
|
||||
const unsigned char *p2 = (const unsigned char *)s2;
|
||||
size_t i = -kVectorSize;
|
||||
vLoop:
|
||||
i += kVectorSize;
|
||||
bLoop:
|
||||
if (!IsPointerDangerous(p1 + i) && !IsPointerDangerous(p2 + i)) {
|
||||
unsigned char zf;
|
||||
vbitmask_t r1;
|
||||
uint8_v v1, v2;
|
||||
const uint8_v kZero = {0};
|
||||
asm(ZFLAG_ASM("vmovdqu\t%5,%2\n\t" /* move because gcc problematic */
|
||||
"vpcmpeqb\t%4,%2,%1\n\t" /* check for equality in p1 and p2 */
|
||||
"vpcmpeqb\t%6,%2,%2\n\t" /* check for nul in p1 */
|
||||
"vpandn\t%7,%1,%2\n\t" /* most complicated bitwise not ever */
|
||||
"vpor\t%2,%1,%1\n\t" /* check for nul in p2 */
|
||||
"pmovmskb\t%1,%3\n\t" /* turn 256 bits into 32 bits */
|
||||
"bsf\t%3,%3") /* find stop byte */
|
||||
: ZFLAG_CONSTRAINT(zf), "=x"(v1), "=x"(v2), "=r"(r1)
|
||||
: "m"(*(const uint8_v *)(p1 + i)), "m"(*(const uint8_v *)(p2 + i)),
|
||||
"x"(kZero), "m"(kVectorSize));
|
||||
if (zf) goto vLoop;
|
||||
return p1[i + r1] - p2[i + r1];
|
||||
void *memrchr16(const void *s, int c, size_t n) {
|
||||
const void *r;
|
||||
if (!IsTiny() && X86_HAVE(SSE)) {
|
||||
if (IsAsan()) __asan_verify(s, n * 2);
|
||||
r = memrchr16_sse(s, c, n);
|
||||
} else {
|
||||
i += 1;
|
||||
int c;
|
||||
if (!(c = p1[i - 1] - p2[i - 1]) && p1[i - 1] + p1[i - 1] != 0) goto bLoop;
|
||||
return c;
|
||||
r = memrchr16_pure(s, c, n);
|
||||
}
|
||||
return (void *)r;
|
||||
}
|
|
@ -88,7 +88,6 @@ void *memmove(void *, const void *, size_t) memcpyesque;
|
|||
void *memcpy(void *restrict, const void *restrict, size_t) memcpyesque;
|
||||
void *mempcpy(void *restrict, const void *restrict, size_t) memcpyesque;
|
||||
void *memccpy(void *restrict, const void *restrict, int, size_t) memcpyesque;
|
||||
void *memeqmask(void *, const void *, const void *, size_t) memcpyesque;
|
||||
void bcopy(const void *, void *, size_t) memcpyesque;
|
||||
void explicit_bzero(void *, size_t);
|
||||
|
||||
|
@ -173,7 +172,6 @@ wchar_t *wcsncat(wchar_t *, const wchar_t *, size_t) memcpyesque;
|
|||
char *strncpy(char *, const char *, size_t) memcpyesque;
|
||||
char *strtok(char *, const char *) paramsnonnull((2)) libcesque;
|
||||
char *strtok_r(char *, const char *, char **) paramsnonnull((2, 3));
|
||||
uint16_t *strcpyzbw(uint16_t *, const char *) memcpyesque;
|
||||
wchar_t *wcstok(wchar_t *, const wchar_t *, wchar_t **) paramsnonnull((2, 3));
|
||||
char *wstrtrunc(uint16_t *) memcpyesque;
|
||||
char *wstrntrunc(uint16_t *, size_t) memcpyesque;
|
||||
|
|
|
@ -20,10 +20,7 @@
|
|||
#include "libc/str/str.h"
|
||||
|
||||
static inline noasan uint64_t UncheckedAlignedRead64(const char *p) {
|
||||
return (uint64_t)(255 & p[7]) << 070 | (uint64_t)(255 & p[6]) << 060 |
|
||||
(uint64_t)(255 & p[5]) << 050 | (uint64_t)(255 & p[4]) << 040 |
|
||||
(uint64_t)(255 & p[3]) << 030 | (uint64_t)(255 & p[2]) << 020 |
|
||||
(uint64_t)(255 & p[1]) << 010 | (uint64_t)(255 & p[0]) << 000;
|
||||
return *(uint64_t *)p;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,12 +18,14 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) {
|
||||
uint64_t w;
|
||||
for (; i + 8 < n; i += 8) {
|
||||
w = READ64LE(s + i);
|
||||
w = *(uint64_t *)(s + i);
|
||||
if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
|
||||
i += (unsigned)__builtin_ctzll(w) >> 3;
|
||||
break;
|
||||
|
@ -40,8 +42,9 @@ static noasan size_t strnlen_x64(const char *s, size_t n, size_t i) {
|
|||
* @return byte length
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
size_t strnlen(const char *s, size_t n) {
|
||||
noasan size_t strnlen(const char *s, size_t n) {
|
||||
size_t i;
|
||||
if (IsAsan() && n) __asan_verify(s, 1);
|
||||
for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
|
||||
if (i == n || !s[i]) return i;
|
||||
}
|
||||
|
@ -50,5 +53,6 @@ size_t strnlen(const char *s, size_t n) {
|
|||
if (i == n || !s[i]) break;
|
||||
}
|
||||
assert(i == n || (i < n && !s[i]));
|
||||
if (IsAsan()) __asan_verify(s, i);
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,32 +16,46 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/nexgen32e.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
TEST(sidiv, smoke) {
|
||||
EXPECT_EQ(13373133731337 / 10, div10int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 / 100, div100int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 / 1000, div1000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 / 10000, div10000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 / 1000000, div1000000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 / 1000000000, div1000000000int64(13373133731337));
|
||||
static noasan size_t strnlen_s_x64(const char *s, size_t n, size_t i) {
|
||||
uint64_t w;
|
||||
for (; i + 8 < n; i += 8) {
|
||||
w = *(uint64_t *)(s + i);
|
||||
if ((w = ~w & (w - 0x0101010101010101) & 0x8080808080808080)) {
|
||||
i += (unsigned)__builtin_ctzll(w) >> 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
TEST(sirem, smoke) {
|
||||
EXPECT_EQ(13373133731337 % 10, rem10int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 % 100, rem100int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 % 1000, rem1000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 % 10000, rem10000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 % 1000000, rem1000000int64(13373133731337));
|
||||
EXPECT_EQ(13373133731337 % 1000000000, rem1000000000int64(13373133731337));
|
||||
}
|
||||
|
||||
TEST(rem, euclid) {
|
||||
ASSERT_EQ(-2, rem10int64(-12));
|
||||
ASSERT_EQ(-1, rem10int64(-1));
|
||||
ASSERT_EQ(0, rem10int64(0));
|
||||
ASSERT_EQ(1, rem10int64(1));
|
||||
ASSERT_EQ(9, rem10int64(9));
|
||||
ASSERT_EQ(1, rem10int64(11));
|
||||
/**
|
||||
* Returns length of NUL-terminated string... securely.
|
||||
*
|
||||
* This is like strnlen() except it'll return 0 if `s` is null. We also
|
||||
* make the assumption for the purposes of ASAN that `n` is the size of
|
||||
* the buffer if `s` is non-null.
|
||||
*
|
||||
* @param s is string
|
||||
* @param n is max length
|
||||
* @return byte length
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
noasan size_t strnlen_s(const char *s, size_t n) {
|
||||
size_t i;
|
||||
if (!s) return 0;
|
||||
if (IsAsan()) __asan_verify(s, n);
|
||||
for (i = 0; (uintptr_t)(s + i) & 7; ++i) {
|
||||
if (i == n || !s[i]) return i;
|
||||
}
|
||||
i = strnlen_s_x64(s, n, i);
|
||||
for (;; ++i) {
|
||||
if (i == n || !s[i]) break;
|
||||
}
|
||||
assert(i == n || (i < n && !s[i]));
|
||||
return i;
|
||||
}
|
76
libc/str/wmemrchr.c
Normal file
76
libc/str/wmemrchr.c
Normal file
|
@ -0,0 +1,76 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/str/str.h"
|
||||
|
||||
typedef wchar_t xmm_t __attribute__((__vector_size__(16), __aligned__(4)));
|
||||
|
||||
static inline const wchar_t *wmemrchr_pure(const wchar_t *s, wchar_t c,
|
||||
size_t n) {
|
||||
size_t i;
|
||||
for (i = n; i--;) {
|
||||
if (s[i] == c) {
|
||||
return s + i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
noasan static inline const wchar_t *wmemrchr_sse(const wchar_t *s, wchar_t c,
|
||||
size_t n) {
|
||||
size_t i;
|
||||
unsigned k, m;
|
||||
xmm_t v, t = {c, c, c, c};
|
||||
for (i = n; i >= 4;) {
|
||||
v = *(const xmm_t *)(s + (i -= 4));
|
||||
m = __builtin_ia32_pmovmskb128(v == t);
|
||||
if (m) {
|
||||
m = __builtin_clzl(m) ^ (sizeof(long) * CHAR_BIT - 1);
|
||||
return s + i + m / 4;
|
||||
}
|
||||
}
|
||||
while (i--) {
|
||||
if (s[i] == c) {
|
||||
return s + i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns pointer to first instance of character.
|
||||
*
|
||||
* @param s is memory to search
|
||||
* @param c is search word
|
||||
* @param n is number of wchar_t elements in `s`
|
||||
* @return is pointer to first instance of c or NULL if not found
|
||||
* @asyncsignalsafe
|
||||
*/
|
||||
void *wmemrchr(const void *s, wchar_t c, size_t n) {
|
||||
const void *r;
|
||||
if (!IsTiny() && X86_HAVE(SSE)) {
|
||||
if (IsAsan()) __asan_verify(s, n * 4);
|
||||
r = wmemrchr_sse(s, c, n);
|
||||
} else {
|
||||
r = wmemrchr_pure(s, c, n);
|
||||
}
|
||||
return (void *)r;
|
||||
}
|
|
@ -29,6 +29,7 @@
|
|||
#include "libc/sysv/consts/clone.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
STATIC_YOINK("_main_thread_ctor");
|
||||
|
@ -77,6 +78,7 @@ static int cthread_start(void *arg) {
|
|||
exitcode = (void *)rc.dx;
|
||||
}
|
||||
td->exitcode = exitcode;
|
||||
_pthread_key_destruct(td->key);
|
||||
if (atomic_load(&td->state) & cthread_detached) {
|
||||
// we're still using the stack
|
||||
// thus we can't munmap it yet
|
||||
|
|
|
@ -54,7 +54,6 @@ int cthread_join(cthread_t td, void **exitcode) {
|
|||
} else {
|
||||
if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) {
|
||||
while ((x = atomic_load(&td->tid))) {
|
||||
// FUTEX_WAIT_PRIVATE makes it hang
|
||||
cthread_memory_wait32(&td->tid, x, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ StartOver:
|
|||
x = _pthread_key_usage[i];
|
||||
while (x) {
|
||||
j = bsrl(x);
|
||||
if ((dtor = _pthread_key_dtor[i * 64 + j]) && (value = key[i * 64 + j])) {
|
||||
if ((value = key[i * 64 + j]) && (dtor = _pthread_key_dtor[i * 64 + j])) {
|
||||
key[i * 64 + j] = 0;
|
||||
dtor(value);
|
||||
goto StartOver;
|
||||
|
|
|
@ -18,53 +18,25 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/atomic.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/sysv/consts/clock.h"
|
||||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/thread/freebsd.internal.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
int cthread_memory_wait32(int* addr, int val, const struct timespec* timeout) {
|
||||
size_t size;
|
||||
struct _umtx_time *put, ut;
|
||||
if (IsLinux() || IsOpenbsd()) {
|
||||
return sys_futex(addr, FUTEX_WAIT, val, timeout, 0);
|
||||
|
||||
#if 0
|
||||
} else if (IsFreebsd()) {
|
||||
if (!timeout) {
|
||||
put = 0;
|
||||
size = 0;
|
||||
} else {
|
||||
ut._flags = 0;
|
||||
ut._clockid = CLOCK_REALTIME;
|
||||
ut._timeout = *timeout;
|
||||
put = &ut;
|
||||
size = sizeof(ut);
|
||||
}
|
||||
return _umtx_op(addr, UMTX_OP_MUTEX_WAIT, 0, &size, put);
|
||||
#endif
|
||||
|
||||
return _futex_wait(addr, val, timeout);
|
||||
} else {
|
||||
unsigned tries;
|
||||
for (tries = 1; atomic_load(addr) == val; ++tries) {
|
||||
if (tries & 7) {
|
||||
__builtin_ia32_pause();
|
||||
} else {
|
||||
sched_yield();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return sched_yield();
|
||||
}
|
||||
}
|
||||
|
||||
int cthread_memory_wake32(int* addr, int n) {
|
||||
if (IsLinux() || IsOpenbsd()) {
|
||||
return sys_futex(addr, FUTEX_WAKE, n, 0, 0);
|
||||
#if 0
|
||||
} else if (IsFreebsd()) {
|
||||
return _umtx_op(addr, UMTX_OP_MUTEX_WAKE, n, 0, 0);
|
||||
#endif
|
||||
return _futex_wake(addr, n);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ void cthread_zombies_add(cthread_t td) {
|
|||
|
||||
void cthread_zombies_reap(void) {
|
||||
struct Zombie *z;
|
||||
// TODO(jart): Is this right? Update to not use malloc/free?
|
||||
while ((z = atomic_load(&cthread_zombies)) && !atomic_load(&z->td->tid)) {
|
||||
if (atomic_compare_exchange_weak(&cthread_zombies, &z, z->next)) {
|
||||
munmap(z->td->alloc.bottom, z->td->alloc.top - z->td->alloc.bottom);
|
||||
|
|
|
@ -28,7 +28,7 @@ long double dsleep(long double secs) {
|
|||
struct timespec dur, rem;
|
||||
dur.tv_sec = secs;
|
||||
dur.tv_nsec = secs * 1e9;
|
||||
dur.tv_nsec = rem1000000000int64(dur.tv_nsec);
|
||||
dur.tv_nsec = dur.tv_nsec % 1000000000;
|
||||
if (secs > 1e-6) {
|
||||
nanosleep(&dur, &rem);
|
||||
secs = rem.tv_nsec;
|
||||
|
|
|
@ -33,8 +33,23 @@
|
|||
#include "libc/testlib/testlib.h"
|
||||
#include "tool/net/sandbox.h"
|
||||
|
||||
// It's been reported that Chromebooks return EINVAL here.
|
||||
bool CanUseSeccomp(void) {
|
||||
int ws, pid;
|
||||
ASSERT_NE(-1, (pid = fork()));
|
||||
if (!pid) {
|
||||
if (seccomp(SECCOMP_SET_MODE_STRICT, 0, 0) != -1) {
|
||||
_Exit1(0);
|
||||
} else {
|
||||
_Exit1(1);
|
||||
}
|
||||
}
|
||||
EXPECT_NE(-1, wait(&ws));
|
||||
return WIFEXITED(ws) && !WEXITSTATUS(ws);
|
||||
}
|
||||
|
||||
void SetUp(void) {
|
||||
if (!__is_linux_2_6_23()) {
|
||||
if (!__is_linux_2_6_23() || !CanUseSeccomp()) {
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ TEST(pthread_mutex_lock, contention) {
|
|||
for (i = 0; i < THREADS; ++i) {
|
||||
munmap(stack[i], GetStackSize());
|
||||
}
|
||||
pthread_mutex_destroy(&lock);
|
||||
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
|
||||
}
|
||||
|
||||
TEST(pthread_mutex_lock, rcontention) {
|
||||
|
@ -159,7 +159,7 @@ TEST(pthread_mutex_lock, rcontention) {
|
|||
for (i = 0; i < THREADS; ++i) {
|
||||
munmap(stack[i], GetStackSize());
|
||||
}
|
||||
pthread_mutex_destroy(&lock);
|
||||
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
|
||||
}
|
||||
|
||||
TEST(pthread_mutex_lock, econtention) {
|
||||
|
@ -187,7 +187,7 @@ TEST(pthread_mutex_lock, econtention) {
|
|||
for (i = 0; i < THREADS; ++i) {
|
||||
munmap(stack[i], GetStackSize());
|
||||
}
|
||||
pthread_mutex_destroy(&lock);
|
||||
EXPECT_EQ(0, pthread_mutex_destroy(&lock));
|
||||
}
|
||||
|
||||
int SpinlockWorker(void *p) {
|
||||
|
|
|
@ -106,9 +106,9 @@ TEST(strnlen, nulNotFound_ReturnsSize) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(strnlen_s, nulNotFound_ReturnsZero) {
|
||||
TEST(strnlen_s, nulNotFound) {
|
||||
char buf[3] = {1, 2, 3};
|
||||
ASSERT_EQ(0, strnlen_s(buf, 3));
|
||||
ASSERT_EQ(3, strnlen_s(buf, 3));
|
||||
}
|
||||
|
||||
TEST(strlen, fuzz) {
|
|
@ -1,101 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/runtime/buffer.h"
|
||||
#include "libc/runtime/gc.internal.h"
|
||||
#include "libc/stdio/stdio.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
#include "libc/x/x.h"
|
||||
|
||||
#define ALIGN 128
|
||||
#define BUFSIZE (8 * 32)
|
||||
#define MASKSIZE (BUFSIZE / CHAR_BIT)
|
||||
|
||||
const char kX[] = "aaaaaaaaeeeeeeeeeeeeeeeeeeeeeeee"
|
||||
"e e"
|
||||
"e e"
|
||||
"e e"
|
||||
"e e"
|
||||
"e e"
|
||||
"e e"
|
||||
"eeeeeeeeeeeeeeeeeeeeeeeeeeeeee-e";
|
||||
|
||||
const char kY[] = "aaaaaaaaefffffffffffeffffffffff-"
|
||||
"f z-"
|
||||
"f f"
|
||||
"f f"
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
"f f"
|
||||
"f f"
|
||||
"ffffffffffffffffffffffffffffff-f";
|
||||
|
||||
const char kM[] = "11111111100000000000100000000000"
|
||||
"01111111111111111111111111111100"
|
||||
"01111111111111111111111111111110"
|
||||
"01111111111111111111111111111110"
|
||||
"00000000000000000000000000000000"
|
||||
"01111111111111111111111111111110"
|
||||
"01111111111111111111111111111110"
|
||||
"00000000000000000000000000000010";
|
||||
|
||||
dontdiscard char *binify(uint8_t *data, size_t size) {
|
||||
uint8_t b;
|
||||
size_t i, j;
|
||||
char *s, *p;
|
||||
p = s = xmalloc(size * CHAR_BIT + 1);
|
||||
for (i = 0; i < size; ++i) {
|
||||
b = data[i];
|
||||
for (j = 0; j < CHAR_BIT; ++j) {
|
||||
*p++ = "01"[b & 1];
|
||||
b >>= 1;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST(memeqmask, test) {
|
||||
struct GuardedBuffer x = {}, y = {}, m = {};
|
||||
memcpy(balloc(&x, ALIGN, BUFSIZE), kX, BUFSIZE);
|
||||
memcpy(balloc(&y, ALIGN, BUFSIZE), kY, BUFSIZE);
|
||||
balloc(&m, ALIGN, MASKSIZE);
|
||||
EXPECT_EQ((intptr_t)m.p, (intptr_t)memeqmask(m.p, x.p, y.p, BUFSIZE));
|
||||
EXPECT_STREQ(kM, gc(binify(m.p, MASKSIZE)));
|
||||
bfree(&m);
|
||||
bfree(&x);
|
||||
bfree(&y);
|
||||
}
|
||||
|
||||
#if 0
|
||||
#include "libc/rand/rand.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
TEST(memeqmask, bench) {
|
||||
size_t len = 64 * 1024;
|
||||
char *m = xmemalign(64, DIMMASK(len));
|
||||
char *x = xmemalign(64, len);
|
||||
char *y = xmemalign(64, len);
|
||||
EZBENCH(
|
||||
{
|
||||
rngset(x, len, rand64, -1);
|
||||
rngset(y, len, rand64, -1);
|
||||
},
|
||||
memeqmask(m, x, y, len));
|
||||
}
|
||||
#endif
|
|
@ -19,6 +19,7 @@
|
|||
#include "libc/nexgen32e/nexgen32e.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
||||
TEST(memrchr16, test) {
|
||||
|
@ -31,4 +32,6 @@ TEST(memrchr16, test) {
|
|||
BENCH(memrchr16, bench) {
|
||||
EZBENCH2("memrchr16", donothing,
|
||||
EXPROPRIATE(memrchr16(u"yo.hi.there", '.', 11)));
|
||||
EZBENCH2("memrchr16 hyperion", donothing,
|
||||
EXPROPRIATE(memrchr16(kHyperion, '.', kHyperionSize / 2)));
|
||||
}
|
0
test/libc/str/strnlen_test.c
Normal file
0
test/libc/str/strnlen_test.c
Normal file
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,40 +16,21 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/hyperion.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
||||
unsigned cescapec(int c) {
|
||||
unsigned char ch = c;
|
||||
switch (ch) {
|
||||
case '\a':
|
||||
return '\\' | 'a' << 8;
|
||||
case '\b':
|
||||
return '\\' | 'b' << 8;
|
||||
case '\v':
|
||||
return '\\' | 'v' << 8;
|
||||
case '\f':
|
||||
return '\\' | 'f' << 8;
|
||||
case '\?':
|
||||
return '\\' | '?' << 8;
|
||||
case '\n':
|
||||
return '\\' | 'n' << 8;
|
||||
case '\r':
|
||||
return '\\' | 'r' << 8;
|
||||
case '\t':
|
||||
return '\\' | 't' << 8;
|
||||
case '\"':
|
||||
return '\\' | '"' << 8;
|
||||
case '\'':
|
||||
return '\\' | '\'' << 8;
|
||||
case '\\':
|
||||
return '\\' | '\\' << 8;
|
||||
default: {
|
||||
if (ch >= 0x80 || !isprint(ch)) {
|
||||
return '\\' | (ch / 64 + '0') << 8 | (ch % 64 / 8 + '0') << 16 |
|
||||
(ch % 8 + '0') << 24;
|
||||
} else {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST(wmemrchr, test) {
|
||||
EXPECT_EQ(NULL, wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '-', 27));
|
||||
EXPECT_STREQ(L".there", wmemrchr(L"yo.hi.there", '.', 11));
|
||||
EXPECT_STREQ(L".thereeuhcruhrceeuhcre",
|
||||
wmemrchr(L"yo.hi.thereeuhcruhrceeuhcre", '.', 27));
|
||||
}
|
||||
|
||||
BENCH(wmemrchr, bench) {
|
||||
EZBENCH2("wmemrchr", donothing,
|
||||
EXPROPRIATE(wmemrchr(L"yo.hi.there", '.', 11)));
|
||||
EZBENCH2("wmemrchr hyperion", donothing,
|
||||
EXPROPRIATE(wmemrchr(kHyperion, '.', kHyperionSize / 4)));
|
||||
}
|
14
third_party/stb/stb_image_write.c
vendored
14
third_party/stb/stb_image_write.c
vendored
|
@ -723,12 +723,14 @@ static int stbi_write_jpg_core(stbi__write_context *s, int width, int height,
|
|||
quality = quality < 50 ? 5000 / quality : 200 - quality * 2;
|
||||
|
||||
for (i = 0; i < 64; ++i) {
|
||||
int uvti, yti = div100int64((YQT[i] * quality + 50));
|
||||
YTable[stbiw__jpg_ZigZag[i]] =
|
||||
(unsigned char)(yti < 1 ? 1 : yti > 255 ? 255 : yti);
|
||||
uvti = div100int64(UVQT[i] * quality + 50);
|
||||
UVTable[stbiw__jpg_ZigZag[i]] =
|
||||
(unsigned char)(uvti < 1 ? 1 : uvti > 255 ? 255 : uvti);
|
||||
int uvti, yti = (YQT[i] * quality + 50) / 100;
|
||||
YTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(yti < 1 ? 1
|
||||
: yti > 255 ? 255
|
||||
: yti);
|
||||
uvti = (UVQT[i] * quality + 50) / 100;
|
||||
UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char)(uvti < 1 ? 1
|
||||
: uvti > 255 ? 255
|
||||
: uvti);
|
||||
}
|
||||
|
||||
for (row = 0, k = 0; row < 8; ++row) {
|
||||
|
|
Loading…
Reference in a new issue