Make spin locks go faster

This commit is contained in:
Justine Tunney 2024-07-25 17:14:30 -07:00
parent a31d5ea399
commit c8e25d811c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
16 changed files with 150 additions and 123 deletions

View file

@ -30,15 +30,27 @@
static int cpus; static int cpus;
static double load; static double load;
static pthread_spinlock_t lock;
static struct NtFileTime idle1, kern1, user1; static struct NtFileTime idle1, kern1, user1;
static pthread_mutex_t getloadavg_lock;
static void __getloadavg_lock(void) {
pthread_mutex_lock(&getloadavg_lock);
}
static void __getloadavg_unlock(void) {
pthread_mutex_unlock(&getloadavg_lock);
}
static void __getloadavg_wipe(void) {
pthread_mutex_init(&getloadavg_lock, 0);
}
textwindows int sys_getloadavg_nt(double *a, int n) { textwindows int sys_getloadavg_nt(double *a, int n) {
int i, rc; int i, rc;
uint64_t elapsed, used; uint64_t elapsed, used;
struct NtFileTime idle, kern, user; struct NtFileTime idle, kern, user;
BLOCK_SIGNALS; BLOCK_SIGNALS;
pthread_spin_lock(&lock); __getloadavg_lock();
if (GetSystemTimes(&idle, &kern, &user)) { if (GetSystemTimes(&idle, &kern, &user)) {
elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1)); elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
if (elapsed) { if (elapsed) {
@ -54,7 +66,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
} else { } else {
rc = __winerr(); rc = __winerr();
} }
pthread_spin_unlock(&lock); __getloadavg_unlock();
ALLOW_SIGNALS; ALLOW_SIGNALS;
return rc; return rc;
} }
@ -65,5 +77,7 @@ __attribute__((__constructor__(40))) static textstartup void ntinitload(void) {
cpus = __get_cpu_count() / 2; cpus = __get_cpu_count() / 2;
cpus = MAX(1, cpus); cpus = MAX(1, cpus);
GetSystemTimes(&idle1, &kern1, &user1); GetSystemTimes(&idle1, &kern1, &user1);
pthread_atfork(__getloadavg_lock, __getloadavg_unlock, __getloadavg_wipe);
__getloadavg_wipe();
} }
} }

View file

@ -51,6 +51,7 @@
#include "libc/sysv/consts/sicode.h" #include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h" #include "libc/sysv/consts/ss.h"
#include "libc/thread/posixthread.internal.h" #include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#ifdef __x86_64__ #ifdef __x86_64__
/** /**
@ -64,6 +65,20 @@ struct SignalFrame {
ucontext_t ctx; ucontext_t ctx;
}; };
static pthread_mutex_t __sig_lock_obj;
static void __sig_wipe(void) {
pthread_mutex_init(&__sig_lock_obj, 0);
}
static void __sig_lock(void) {
pthread_mutex_lock(&__sig_lock_obj);
}
static void __sig_unlock(void) {
pthread_mutex_unlock(&__sig_lock_obj);
}
static textwindows bool __sig_ignored_by_default(int sig) { static textwindows bool __sig_ignored_by_default(int sig) {
return sig == SIGURG || // return sig == SIGURG || //
sig == SIGCONT || // sig == SIGCONT || //
@ -318,11 +333,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
// take control of thread // take control of thread
// suspending the thread happens asynchronously // suspending the thread happens asynchronously
// however getting the context blocks until it's frozen // however getting the context blocks until it's frozen
static pthread_spinlock_t killer_lock; __sig_lock();
pthread_spin_lock(&killer_lock);
if (SuspendThread(th) == -1u) { if (SuspendThread(th) == -1u) {
STRACE("SuspendThread failed w/ %d", GetLastError()); STRACE("SuspendThread failed w/ %d", GetLastError());
pthread_spin_unlock(&killer_lock); __sig_unlock();
return ESRCH; return ESRCH;
} }
struct NtContext nc; struct NtContext nc;
@ -330,10 +344,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
if (!GetThreadContext(th, &nc)) { if (!GetThreadContext(th, &nc)) {
STRACE("GetThreadContext failed w/ %d", GetLastError()); STRACE("GetThreadContext failed w/ %d", GetLastError());
ResumeThread(th); ResumeThread(th);
pthread_spin_unlock(&killer_lock); __sig_unlock();
return ESRCH; return ESRCH;
} }
pthread_spin_unlock(&killer_lock); __sig_unlock();
// we can't preempt threads that masked sig or are blocked // we can't preempt threads that masked sig or are blocked
// we can't preempt threads that are running in win32 code // we can't preempt threads that are running in win32 code
@ -612,6 +626,8 @@ __attribute__((__constructor__(10))) textstartup void __sig_init(void) {
return; return;
AddVectoredExceptionHandler(true, (void *)__sig_crash); AddVectoredExceptionHandler(true, (void *)__sig_crash);
SetConsoleCtrlHandler((void *)__sig_console, true); SetConsoleCtrlHandler((void *)__sig_console, true);
pthread_atfork(__sig_lock, __sig_unlock, __sig_wipe);
__sig_wipe();
} }
#endif /* __x86_64__ */ #endif /* __x86_64__ */

View file

@ -35,4 +35,5 @@ void __cxa_unlock(void) {
__attribute__((__constructor__(60))) static textstartup void __cxa_init() { __attribute__((__constructor__(60))) static textstartup void __cxa_init() {
pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe); pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe);
__cxa_wipe();
} }

View file

@ -86,27 +86,15 @@ void __maps_init(void) {
privileged bool __maps_lock(void) { privileged bool __maps_lock(void) {
struct CosmoTib *tib; struct CosmoTib *tib;
if (!__tls_enabled) if (__tls_enabled)
return false; return false;
tib = __get_tls_privileged(); tib = __get_tls_privileged();
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed)) if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
return true; return true;
int backoff = 0; while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire))
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) { for (;;)
if (backoff < 7) { if (!atomic_load_explicit(&__maps.lock, memory_order_relaxed))
volatile int i; break;
for (i = 0; i != 1 << backoff; i++) {
}
backoff++;
} else {
// STRACE("pthread_delay_np(__maps)");
#if defined(__GNUC__) && defined(__aarch64__)
__asm__ volatile("yield");
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
__asm__ volatile("pause");
#endif
}
}
return false; return false;
} }

View file

@ -16,21 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/atomic.h"
#include "libc/calls/state.internal.h"
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/dll.h"
#include "libc/intrin/strace.h" #include "libc/intrin/strace.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
#include "libc/proc/proc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/thread/posixthread.internal.h" #include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "libc/thread/tls.h"
struct AtFork { struct AtFork {
struct AtFork *p[2]; struct AtFork *p[2];
@ -38,16 +28,16 @@ struct AtFork {
}; };
static struct AtForks { static struct AtForks {
pthread_spinlock_t lock; pthread_mutex_t lock;
struct AtFork *list; struct AtFork *list;
struct AtFork pool[64]; struct AtFork pool[256];
atomic_int allocated; int allocated;
} _atforks; } _atforks = {
PTHREAD_MUTEX_INITIALIZER,
};
static void _pthread_onfork(int i, const char *op) { static void _pthread_onfork(int i, const char *op) {
struct AtFork *a; struct AtFork *a;
if (!i)
pthread_spin_lock(&_atforks.lock);
for (a = _atforks.list; a; a = a->p[!i]) { for (a = _atforks.list; a; a = a->p[!i]) {
if (a->f[i]) { if (a->f[i]) {
STRACE("pthread_atfork(%s, %t)", op, a->f[i]); STRACE("pthread_atfork(%s, %t)", op, a->f[i]);
@ -55,47 +45,41 @@ static void _pthread_onfork(int i, const char *op) {
} }
_atforks.list = a; _atforks.list = a;
} }
if (i)
pthread_spin_unlock(&_atforks.lock);
} }
void _pthread_onfork_prepare(void) { void _pthread_onfork_prepare(void) {
pthread_mutex_lock(&_atforks.lock);
_pthread_onfork(0, "prepare"); _pthread_onfork(0, "prepare");
} }
void _pthread_onfork_parent(void) { void _pthread_onfork_parent(void) {
_pthread_onfork(1, "parent"); _pthread_onfork(1, "parent");
pthread_mutex_unlock(&_atforks.lock);
} }
void _pthread_onfork_child(void) { void _pthread_onfork_child(void) {
pthread_mutex_init(&_atforks.lock, 0);
_pthread_onfork(2, "child"); _pthread_onfork(2, "child");
} }
static struct AtFork *_pthread_atfork_alloc(void) {
int i, n = ARRAYLEN(_atforks.pool);
if (atomic_load_explicit(&_atforks.allocated, memory_order_relaxed) < n &&
(i = atomic_fetch_add(&_atforks.allocated, 1)) < n) {
return _atforks.pool + i;
} else {
return 0;
}
}
int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) { int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) {
int rc; int rc;
struct AtFork *a; struct AtFork *a;
if (!(a = _pthread_atfork_alloc())) pthread_mutex_lock(&_atforks.lock);
return ENOMEM; if (_atforks.allocated < ARRAYLEN(_atforks.pool)) {
a->f[0] = prepare; a = &_atforks.pool[_atforks.allocated++];
a->f[1] = parent; a->f[0] = prepare;
a->f[2] = child; a->f[1] = parent;
pthread_spin_lock(&_atforks.lock); a->f[2] = child;
a->p[0] = 0; a->p[0] = 0;
a->p[1] = _atforks.list; a->p[1] = _atforks.list;
if (_atforks.list) if (_atforks.list)
_atforks.list->p[0] = a; _atforks.list->p[0] = a;
_atforks.list = a; _atforks.list = a;
pthread_spin_unlock(&_atforks.lock); rc = 0;
rc = 0; } else {
rc = ENOMEM;
}
pthread_mutex_unlock(&_atforks.lock);
return rc; return rc;
} }

View file

@ -31,17 +31,16 @@
#include "third_party/nsync/futex.internal.h" #include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) { static void pthread_mutex_lock_spin(atomic_int *word) {
int backoff = 0; int backoff = 0;
uint64_t lock;
for (;;) { for (;;) {
word = MUTEX_UNLOCK(word); if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
lock = MUTEX_LOCK(word); break;
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock, for (;;) {
memory_order_acquire, if (!atomic_load_explicit(word, memory_order_relaxed))
memory_order_relaxed)) break;
return; backoff = pthread_delay_np(word, backoff);
backoff = pthread_delay_np(mutex, backoff); }
} }
} }
@ -96,7 +95,12 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
mutex->_pid = __pid; mutex->_pid = __pid;
return 0; return 0;
} }
backoff = pthread_delay_np(mutex, backoff); for (;;) {
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
if (!MUTEX_LOCKED(word))
break;
backoff = pthread_delay_np(mutex, backoff);
}
} }
} }
@ -121,7 +125,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) { if (_weaken(nsync_futex_wait_)) {
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word)); pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else { } else {
pthread_mutex_lock_naive(mutex, word); pthread_mutex_lock_spin(&mutex->_futex);
} }
return 0; return 0;
} }

View file

@ -27,14 +27,8 @@
#include "third_party/nsync/futex.internal.h" #include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex, static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
uint64_t word) { if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
uint64_t lock;
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return 0; return 0;
return EBUSY; return EBUSY;
} }
@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) { if (_weaken(nsync_futex_wait_)) {
return pthread_mutex_trylock_drepper(&mutex->_futex); return pthread_mutex_trylock_drepper(&mutex->_futex);
} else { } else {
return pthread_mutex_trylock_naive(mutex, word); return pthread_mutex_trylock_spin(&mutex->_futex);
} }
} }

View file

@ -28,9 +28,8 @@
#include "third_party/nsync/futex.internal.h" #include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h" #include "third_party/nsync/mu.h"
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) { static void pthread_mutex_unlock_spin(atomic_int *word) {
uint64_t lock = MUTEX_UNLOCK(word); atomic_store_explicit(word, 0, memory_order_release);
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
} }
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper // see "take 3" algorithm in "futexes are tricky" by ulrich drepper
@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wake_)) { if (_weaken(nsync_futex_wake_)) {
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word)); pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else { } else {
pthread_mutex_unlock_naive(mutex, word); pthread_mutex_unlock_spin(&mutex->_futex);
} }
return 0; return 0;
} }

View file

@ -21,30 +21,46 @@
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
static void (*funcs[32])(void);
static int count; static int count;
static pthread_spinlock_t lock; static void (*funcs[32])(void);
pthread_spinlock_t *const __at_quick_exit_lockptr = &lock; static pthread_mutex_t __quick_exit_lock_obj;
static void __quick_exit_wipe(void) {
pthread_mutex_init(&__quick_exit_lock_obj, 0);
}
static void __quick_exit_lock(void) {
pthread_mutex_lock(&__quick_exit_lock_obj);
}
static void __quick_exit_unlock(void) {
pthread_mutex_unlock(&__quick_exit_lock_obj);
}
void __funcs_on_quick_exit(void) { void __funcs_on_quick_exit(void) {
void (*func)(void); void (*func)(void);
pthread_spin_lock(&lock); __quick_exit_lock();
while (count) { while (count) {
func = funcs[--count]; func = funcs[--count];
pthread_spin_unlock(&lock); __quick_exit_unlock();
func(); func();
pthread_spin_lock(&lock); __quick_exit_lock();
} }
} }
int at_quick_exit(void func(void)) { int at_quick_exit(void func(void)) {
int res = 0; int res = 0;
pthread_spin_lock(&lock); __quick_exit_lock();
if (count == ARRAYLEN(funcs)) { if (count == ARRAYLEN(funcs)) {
res = -1; res = -1;
} else { } else {
funcs[count++] = func; funcs[count++] = func;
} }
pthread_spin_unlock(&lock); __quick_exit_unlock();
return res; return res;
} }
__attribute__((__constructor__(10))) textstartup void __quick_exit_init(void) {
pthread_atfork(__quick_exit_lock, __quick_exit_unlock, __quick_exit_wipe);
__quick_exit_wipe();
}

View file

@ -17,6 +17,8 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h" #include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/cosmo.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/promises.h" #include "libc/intrin/promises.h"
#include "libc/intrin/strace.h" #include "libc/intrin/strace.h"
@ -27,14 +29,12 @@
#include "libc/runtime/symbols.internal.h" #include "libc/runtime/symbols.internal.h"
#include "libc/runtime/zipos.internal.h" #include "libc/runtime/zipos.internal.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/thread/thread.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "libc/zip.internal.h" #include "libc/zip.internal.h"
#include "third_party/puff/puff.h" #include "third_party/puff/puff.h"
__static_yoink("__get_symbol"); __static_yoink("__get_symbol");
static pthread_spinlock_t g_lock;
struct SymbolTable *__symtab; // for kprintf struct SymbolTable *__symtab; // for kprintf
static ssize_t GetZipFile(struct Zipos *zipos, const char *name) { static ssize_t GetZipFile(struct Zipos *zipos, const char *name) {
@ -100,6 +100,25 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
} }
} }
static void GetSymbolTableInit(void) {
struct Zipos *z;
int e = errno;
if (!__symtab && !__isworker) {
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
if ((__symtab = GetSymbolTableFromZip(z))) {
__symtab->names =
(uint32_t *)((char *)__symtab + __symtab->names_offset);
__symtab->name_base =
(char *)((char *)__symtab + __symtab->name_base_offset);
}
}
if (!__symtab) {
__symtab = GetSymbolTableFromElf();
}
}
errno = e;
}
/** /**
* Returns symbol table singleton. * Returns symbol table singleton.
* *
@ -121,24 +140,7 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
* @return symbol table, or NULL if not found * @return symbol table, or NULL if not found
*/ */
struct SymbolTable *GetSymbolTable(void) { struct SymbolTable *GetSymbolTable(void) {
struct Zipos *z; static atomic_uint once;
if (pthread_spin_trylock(&g_lock)) cosmo_once(&once, GetSymbolTableInit);
return 0;
int e = errno;
if (!__symtab && !__isworker) {
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
if ((__symtab = GetSymbolTableFromZip(z))) {
__symtab->names =
(uint32_t *)((char *)__symtab + __symtab->names_offset);
__symtab->name_base =
(char *)((char *)__symtab + __symtab->name_base_offset);
}
}
if (!__symtab) {
__symtab = GetSymbolTableFromElf();
}
}
errno = e;
pthread_spin_unlock(&g_lock);
return __symtab; return __symtab;
} }

View file

@ -55,9 +55,14 @@ static void __stdio_fork_parent(void) {
static void __stdio_fork_child(void) { static void __stdio_fork_child(void) {
FILE *f; FILE *f;
for (int i = __fflush.handles.i; i--;) for (int i = __fflush.handles.i; i--;) {
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
if ((f = __fflush.handles.p[i])) if ((f = __fflush.handles.p[i]))
f->lock = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; pthread_mutex_init(&f->lock, &attr);
pthread_mutexattr_destroy(&attr);
}
pthread_mutex_init(&__fflush_lock_obj, 0); pthread_mutex_init(&__fflush_lock_obj, 0);
} }

View file

@ -38,8 +38,12 @@
* @see pthread_spin_init * @see pthread_spin_init
*/ */
errno_t pthread_spin_lock(pthread_spinlock_t *spin) { errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) { for (;;) {
pthread_pause_np(); if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
break;
for (;;)
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
break;
} }
return 0; return 0;
} }