Make spinlocks faster (take two)

This change is green on x86 and arm test fleet.
This commit is contained in:
Justine Tunney 2024-07-26 00:44:45 -07:00
parent 02e1cbcd00
commit 59692b0882
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
14 changed files with 122 additions and 79 deletions

View file

@ -24,13 +24,13 @@
#define N 160
static bool IsDangerous(const void *ptr) {
privileged static bool IsDangerous(const void *ptr) {
if (_weaken(kisdangerous))
return _weaken(kisdangerous)(ptr);
return false;
}
static char *FormatHex(char *p, unsigned long x) {
privileged static char *FormatHex(char *p, unsigned long x) {
int k = x ? (__builtin_clzl(x) ^ 63) + 1 : 1;
k = (k + 3) & -4;
while (k > 0)
@ -39,8 +39,8 @@ static char *FormatHex(char *p, unsigned long x) {
return p;
}
dontinstrument const char *(DescribeBacktrace)(char buf[N],
const struct StackFrame *fr) {
privileged dontinstrument const char *(
DescribeBacktrace)(char buf[N], const struct StackFrame *fr) {
char *p = buf;
char *pe = p + N;
bool gotsome = false;

View file

@ -20,7 +20,7 @@
// returns true if `p` is preceded by x86 call instruction
// this is actually impossible to do but we'll do our best
dontinstrument int __is_call(const unsigned char *p) {
privileged dontinstrument int __is_call(const unsigned char *p) {
if (p[-5] == 0xe8)
return 5; // call Jvds
if (p[-2] == 0xff && (p[-1] & 070) == 020)

View file

@ -18,13 +18,17 @@
*/
#include "libc/intrin/maps.h"
#include "ape/sections.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describebacktrace.h"
#include "libc/intrin/dll.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/maps.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/lock.h"
#ifdef __x86_64__
__static_yoink("_init_maps");
@ -85,37 +89,67 @@ void __maps_init(void) {
}
privileged bool __maps_lock(void) {
int me;
uint64_t word, lock;
struct CosmoTib *tib;
if (!__tls_enabled)
return false;
tib = __get_tls_privileged();
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
return true;
int backoff = 0;
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
if (backoff < 7) {
volatile int i;
for (i = 0; i != 1 << backoff; i++) {
}
backoff++;
} else {
// STRACE("pthread_delay_np(__maps)");
#if defined(__GNUC__) && defined(__aarch64__)
__asm__ volatile("yield");
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
__asm__ volatile("pause");
#endif
if (!(tib = __get_tls_privileged()))
return false;
if (tib->tib_flags & TIB_FLAG_VFORKED)
return false;
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
if (me <= 0)
return false;
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
for (;;) {
if (MUTEX_OWNER(word) == me) {
if (atomic_compare_exchange_weak_explicit(
&__maps.lock, &word, MUTEX_INC_DEPTH(word), memory_order_relaxed,
memory_order_relaxed))
return true;
continue;
}
word = 0;
lock = MUTEX_LOCK(word);
lock = MUTEX_SET_OWNER(lock, me);
if (atomic_compare_exchange_weak_explicit(&__maps.lock, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return false;
for (;;) {
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
if (MUTEX_OWNER(word) == me)
break;
if (!word)
break;
}
}
return false;
}
privileged void __maps_unlock(void) {
int me;
uint64_t word;
struct CosmoTib *tib;
if (!__tls_enabled)
return;
tib = __get_tls_privileged();
if (atomic_fetch_sub_explicit(&tib->tib_relock_maps, 1,
memory_order_relaxed) == 1)
atomic_store_explicit(&__maps.lock, 0, memory_order_release);
if (!(tib = __get_tls_privileged()))
return;
if (tib->tib_flags & TIB_FLAG_VFORKED)
return;
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
if (me <= 0)
return;
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
for (;;) {
if (MUTEX_DEPTH(word)) {
if (atomic_compare_exchange_weak_explicit(
&__maps.lock, &word, MUTEX_DEC_DEPTH(word), memory_order_relaxed,
memory_order_relaxed))
break;
}
if (atomic_compare_exchange_weak_explicit(
&__maps.lock, &word, 0, memory_order_release, memory_order_relaxed))
break;
}
}

View file

@ -27,8 +27,8 @@ struct Map {
};
struct Maps {
atomic_int lock;
struct Tree *maps;
_Atomic(uint64_t) lock;
_Atomic(struct Map *) freed;
size_t count;
size_t pages;

View file

@ -31,17 +31,16 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
static void pthread_mutex_lock_spin(atomic_int *word) {
int backoff = 0;
uint64_t lock;
for (;;) {
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return;
backoff = pthread_delay_np(mutex, backoff);
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
break;
for (;;) {
if (!atomic_load_explicit(word, memory_order_relaxed))
break;
backoff = pthread_delay_np(word, backoff);
}
}
}
@ -96,7 +95,14 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
mutex->_pid = __pid;
return 0;
}
backoff = pthread_delay_np(mutex, backoff);
for (;;) {
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
if (MUTEX_OWNER(word) == me)
break;
if (word == MUTEX_UNLOCK(word))
break;
backoff = pthread_delay_np(mutex, backoff);
}
}
}
@ -121,7 +127,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_lock_naive(mutex, word);
pthread_mutex_lock_spin(&mutex->_futex);
}
return 0;
}

View file

@ -27,14 +27,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
uint64_t word) {
uint64_t lock;
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
return 0;
return EBUSY;
}
@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
return pthread_mutex_trylock_drepper(&mutex->_futex);
} else {
return pthread_mutex_trylock_naive(mutex, word);
return pthread_mutex_trylock_spin(&mutex->_futex);
}
}

View file

@ -28,9 +28,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
uint64_t lock = MUTEX_UNLOCK(word);
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
static void pthread_mutex_unlock_spin(atomic_int *word) {
atomic_store_explicit(word, 0, memory_order_release);
}
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wake_)) {
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_unlock_naive(mutex, word);
pthread_mutex_unlock_spin(&mutex->_futex);
}
return 0;
}

View file

@ -38,8 +38,12 @@
* @see pthread_spin_init
*/
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
pthread_pause_np();
for (;;) {
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
break;
for (;;)
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
break;
}
return 0;
}