mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-05 18:58:30 +00:00
Make spinlocks faster (take two)
This change is green on x86 and arm test fleet.
This commit is contained in:
parent
02e1cbcd00
commit
59692b0882
14 changed files with 122 additions and 79 deletions
|
@ -24,13 +24,13 @@
|
|||
|
||||
#define N 160
|
||||
|
||||
static bool IsDangerous(const void *ptr) {
|
||||
privileged static bool IsDangerous(const void *ptr) {
|
||||
if (_weaken(kisdangerous))
|
||||
return _weaken(kisdangerous)(ptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
static char *FormatHex(char *p, unsigned long x) {
|
||||
privileged static char *FormatHex(char *p, unsigned long x) {
|
||||
int k = x ? (__builtin_clzl(x) ^ 63) + 1 : 1;
|
||||
k = (k + 3) & -4;
|
||||
while (k > 0)
|
||||
|
@ -39,8 +39,8 @@ static char *FormatHex(char *p, unsigned long x) {
|
|||
return p;
|
||||
}
|
||||
|
||||
dontinstrument const char *(DescribeBacktrace)(char buf[N],
|
||||
const struct StackFrame *fr) {
|
||||
privileged dontinstrument const char *(
|
||||
DescribeBacktrace)(char buf[N], const struct StackFrame *fr) {
|
||||
char *p = buf;
|
||||
char *pe = p + N;
|
||||
bool gotsome = false;
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
// returns true if `p` is preceded by x86 call instruction
|
||||
// this is actually impossible to do but we'll do our best
|
||||
dontinstrument int __is_call(const unsigned char *p) {
|
||||
privileged dontinstrument int __is_call(const unsigned char *p) {
|
||||
if (p[-5] == 0xe8)
|
||||
return 5; // call Jvds
|
||||
if (p[-2] == 0xff && (p[-1] & 070) == 020)
|
||||
|
|
|
@ -18,13 +18,17 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/maps.h"
|
||||
#include "ape/sections.internal.h"
|
||||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/describebacktrace.h"
|
||||
#include "libc/intrin/dll.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/maps.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/sysv/consts/auxv.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/lock.h"
|
||||
|
||||
#ifdef __x86_64__
|
||||
__static_yoink("_init_maps");
|
||||
|
@ -85,37 +89,67 @@ void __maps_init(void) {
|
|||
}
|
||||
|
||||
privileged bool __maps_lock(void) {
|
||||
int me;
|
||||
uint64_t word, lock;
|
||||
struct CosmoTib *tib;
|
||||
if (!__tls_enabled)
|
||||
return false;
|
||||
tib = __get_tls_privileged();
|
||||
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
|
||||
return true;
|
||||
int backoff = 0;
|
||||
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
|
||||
if (backoff < 7) {
|
||||
volatile int i;
|
||||
for (i = 0; i != 1 << backoff; i++) {
|
||||
}
|
||||
backoff++;
|
||||
} else {
|
||||
// STRACE("pthread_delay_np(__maps)");
|
||||
#if defined(__GNUC__) && defined(__aarch64__)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
__asm__ volatile("pause");
|
||||
#endif
|
||||
if (!(tib = __get_tls_privileged()))
|
||||
return false;
|
||||
if (tib->tib_flags & TIB_FLAG_VFORKED)
|
||||
return false;
|
||||
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
|
||||
if (me <= 0)
|
||||
return false;
|
||||
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
|
||||
for (;;) {
|
||||
if (MUTEX_OWNER(word) == me) {
|
||||
if (atomic_compare_exchange_weak_explicit(
|
||||
&__maps.lock, &word, MUTEX_INC_DEPTH(word), memory_order_relaxed,
|
||||
memory_order_relaxed))
|
||||
return true;
|
||||
continue;
|
||||
}
|
||||
word = 0;
|
||||
lock = MUTEX_LOCK(word);
|
||||
lock = MUTEX_SET_OWNER(lock, me);
|
||||
if (atomic_compare_exchange_weak_explicit(&__maps.lock, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
return false;
|
||||
for (;;) {
|
||||
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
|
||||
if (MUTEX_OWNER(word) == me)
|
||||
break;
|
||||
if (!word)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
privileged void __maps_unlock(void) {
|
||||
int me;
|
||||
uint64_t word;
|
||||
struct CosmoTib *tib;
|
||||
if (!__tls_enabled)
|
||||
return;
|
||||
tib = __get_tls_privileged();
|
||||
if (atomic_fetch_sub_explicit(&tib->tib_relock_maps, 1,
|
||||
memory_order_relaxed) == 1)
|
||||
atomic_store_explicit(&__maps.lock, 0, memory_order_release);
|
||||
if (!(tib = __get_tls_privileged()))
|
||||
return;
|
||||
if (tib->tib_flags & TIB_FLAG_VFORKED)
|
||||
return;
|
||||
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
|
||||
if (me <= 0)
|
||||
return;
|
||||
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
|
||||
for (;;) {
|
||||
if (MUTEX_DEPTH(word)) {
|
||||
if (atomic_compare_exchange_weak_explicit(
|
||||
&__maps.lock, &word, MUTEX_DEC_DEPTH(word), memory_order_relaxed,
|
||||
memory_order_relaxed))
|
||||
break;
|
||||
}
|
||||
if (atomic_compare_exchange_weak_explicit(
|
||||
&__maps.lock, &word, 0, memory_order_release, memory_order_relaxed))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,8 +27,8 @@ struct Map {
|
|||
};
|
||||
|
||||
struct Maps {
|
||||
atomic_int lock;
|
||||
struct Tree *maps;
|
||||
_Atomic(uint64_t) lock;
|
||||
_Atomic(struct Map *) freed;
|
||||
size_t count;
|
||||
size_t pages;
|
||||
|
|
|
@ -31,17 +31,16 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
static void pthread_mutex_lock_spin(atomic_int *word) {
|
||||
int backoff = 0;
|
||||
uint64_t lock;
|
||||
for (;;) {
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
return;
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
||||
break;
|
||||
for (;;) {
|
||||
if (!atomic_load_explicit(word, memory_order_relaxed))
|
||||
break;
|
||||
backoff = pthread_delay_np(word, backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,7 +95,14 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
|||
mutex->_pid = __pid;
|
||||
return 0;
|
||||
}
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
for (;;) {
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
if (MUTEX_OWNER(word) == me)
|
||||
break;
|
||||
if (word == MUTEX_UNLOCK(word))
|
||||
break;
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,7 +127,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wait_)) {
|
||||
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_lock_naive(mutex, word);
|
||||
pthread_mutex_lock_spin(&mutex->_futex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -27,14 +27,8 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
uint64_t lock;
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
|
||||
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
||||
return 0;
|
||||
return EBUSY;
|
||||
}
|
||||
|
@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wait_)) {
|
||||
return pthread_mutex_trylock_drepper(&mutex->_futex);
|
||||
} else {
|
||||
return pthread_mutex_trylock_naive(mutex, word);
|
||||
return pthread_mutex_trylock_spin(&mutex->_futex);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,9 +28,8 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
uint64_t lock = MUTEX_UNLOCK(word);
|
||||
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
|
||||
static void pthread_mutex_unlock_spin(atomic_int *word) {
|
||||
atomic_store_explicit(word, 0, memory_order_release);
|
||||
}
|
||||
|
||||
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||
|
@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wake_)) {
|
||||
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_unlock_naive(mutex, word);
|
||||
pthread_mutex_unlock_spin(&mutex->_futex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -38,8 +38,12 @@
|
|||
* @see pthread_spin_init
|
||||
*/
|
||||
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
|
||||
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
|
||||
pthread_pause_np();
|
||||
for (;;) {
|
||||
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
|
||||
break;
|
||||
for (;;)
|
||||
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue