Make spin locks go faster

This commit is contained in:
Justine Tunney 2024-07-25 17:14:30 -07:00
parent a31d5ea399
commit c8e25d811c
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
16 changed files with 150 additions and 123 deletions

View file

@ -30,15 +30,27 @@
static int cpus;
static double load;
static pthread_spinlock_t lock;
static struct NtFileTime idle1, kern1, user1;
static pthread_mutex_t getloadavg_lock;
static void __getloadavg_lock(void) {
pthread_mutex_lock(&getloadavg_lock);
}
static void __getloadavg_unlock(void) {
pthread_mutex_unlock(&getloadavg_lock);
}
static void __getloadavg_wipe(void) {
pthread_mutex_init(&getloadavg_lock, 0);
}
textwindows int sys_getloadavg_nt(double *a, int n) {
int i, rc;
uint64_t elapsed, used;
struct NtFileTime idle, kern, user;
BLOCK_SIGNALS;
pthread_spin_lock(&lock);
__getloadavg_lock();
if (GetSystemTimes(&idle, &kern, &user)) {
elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
if (elapsed) {
@ -54,7 +66,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
} else {
rc = __winerr();
}
pthread_spin_unlock(&lock);
__getloadavg_unlock();
ALLOW_SIGNALS;
return rc;
}
@ -65,5 +77,7 @@ __attribute__((__constructor__(40))) static textstartup void ntinitload(void) {
cpus = __get_cpu_count() / 2;
cpus = MAX(1, cpus);
GetSystemTimes(&idle1, &kern1, &user1);
pthread_atfork(__getloadavg_lock, __getloadavg_unlock, __getloadavg_wipe);
__getloadavg_wipe();
}
}

View file

@ -51,6 +51,7 @@
#include "libc/sysv/consts/sicode.h"
#include "libc/sysv/consts/ss.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#ifdef __x86_64__
/**
@ -64,6 +65,20 @@ struct SignalFrame {
ucontext_t ctx;
};
static pthread_mutex_t __sig_lock_obj;
static void __sig_wipe(void) {
pthread_mutex_init(&__sig_lock_obj, 0);
}
static void __sig_lock(void) {
pthread_mutex_lock(&__sig_lock_obj);
}
static void __sig_unlock(void) {
pthread_mutex_unlock(&__sig_lock_obj);
}
static textwindows bool __sig_ignored_by_default(int sig) {
return sig == SIGURG || //
sig == SIGCONT || //
@ -318,11 +333,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
// take control of thread
// suspending the thread happens asynchronously
// however getting the context blocks until it's frozen
static pthread_spinlock_t killer_lock;
pthread_spin_lock(&killer_lock);
__sig_lock();
if (SuspendThread(th) == -1u) {
STRACE("SuspendThread failed w/ %d", GetLastError());
pthread_spin_unlock(&killer_lock);
__sig_unlock();
return ESRCH;
}
struct NtContext nc;
@ -330,10 +344,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
if (!GetThreadContext(th, &nc)) {
STRACE("GetThreadContext failed w/ %d", GetLastError());
ResumeThread(th);
pthread_spin_unlock(&killer_lock);
__sig_unlock();
return ESRCH;
}
pthread_spin_unlock(&killer_lock);
__sig_unlock();
// we can't preempt threads that masked sig or are blocked
// we can't preempt threads that are running in win32 code
@ -612,6 +626,8 @@ __attribute__((__constructor__(10))) textstartup void __sig_init(void) {
return;
AddVectoredExceptionHandler(true, (void *)__sig_crash);
SetConsoleCtrlHandler((void *)__sig_console, true);
pthread_atfork(__sig_lock, __sig_unlock, __sig_wipe);
__sig_wipe();
}
#endif /* __x86_64__ */

View file

@ -35,4 +35,5 @@ void __cxa_unlock(void) {
__attribute__((__constructor__(60))) static textstartup void __cxa_init() {
pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe);
__cxa_wipe();
}

View file

@ -86,27 +86,15 @@ void __maps_init(void) {
privileged bool __maps_lock(void) {
struct CosmoTib *tib;
if (!__tls_enabled)
if (__tls_enabled)
return false;
tib = __get_tls_privileged();
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
return true;
int backoff = 0;
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
if (backoff < 7) {
volatile int i;
for (i = 0; i != 1 << backoff; i++) {
}
backoff++;
} else {
// STRACE("pthread_delay_np(__maps)");
#if defined(__GNUC__) && defined(__aarch64__)
__asm__ volatile("yield");
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
__asm__ volatile("pause");
#endif
}
}
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire))
for (;;)
if (!atomic_load_explicit(&__maps.lock, memory_order_relaxed))
break;
return false;
}

View file

@ -16,21 +16,11 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/atomic.h"
#include "libc/calls/state.internal.h"
#include "libc/cosmo.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/dll.h"
#include "libc/intrin/strace.h"
#include "libc/macros.internal.h"
#include "libc/proc/proc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
struct AtFork {
struct AtFork *p[2];
@ -38,16 +28,16 @@ struct AtFork {
};
static struct AtForks {
pthread_spinlock_t lock;
pthread_mutex_t lock;
struct AtFork *list;
struct AtFork pool[64];
atomic_int allocated;
} _atforks;
struct AtFork pool[256];
int allocated;
} _atforks = {
PTHREAD_MUTEX_INITIALIZER,
};
static void _pthread_onfork(int i, const char *op) {
struct AtFork *a;
if (!i)
pthread_spin_lock(&_atforks.lock);
for (a = _atforks.list; a; a = a->p[!i]) {
if (a->f[i]) {
STRACE("pthread_atfork(%s, %t)", op, a->f[i]);
@ -55,47 +45,41 @@ static void _pthread_onfork(int i, const char *op) {
}
_atforks.list = a;
}
if (i)
pthread_spin_unlock(&_atforks.lock);
}
void _pthread_onfork_prepare(void) {
pthread_mutex_lock(&_atforks.lock);
_pthread_onfork(0, "prepare");
}
void _pthread_onfork_parent(void) {
_pthread_onfork(1, "parent");
pthread_mutex_unlock(&_atforks.lock);
}
void _pthread_onfork_child(void) {
pthread_mutex_init(&_atforks.lock, 0);
_pthread_onfork(2, "child");
}
static struct AtFork *_pthread_atfork_alloc(void) {
int i, n = ARRAYLEN(_atforks.pool);
if (atomic_load_explicit(&_atforks.allocated, memory_order_relaxed) < n &&
(i = atomic_fetch_add(&_atforks.allocated, 1)) < n) {
return _atforks.pool + i;
} else {
return 0;
}
}
int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) {
int rc;
struct AtFork *a;
if (!(a = _pthread_atfork_alloc()))
return ENOMEM;
pthread_mutex_lock(&_atforks.lock);
if (_atforks.allocated < ARRAYLEN(_atforks.pool)) {
a = &_atforks.pool[_atforks.allocated++];
a->f[0] = prepare;
a->f[1] = parent;
a->f[2] = child;
pthread_spin_lock(&_atforks.lock);
a->p[0] = 0;
a->p[1] = _atforks.list;
if (_atforks.list)
_atforks.list->p[0] = a;
_atforks.list = a;
pthread_spin_unlock(&_atforks.lock);
rc = 0;
} else {
rc = ENOMEM;
}
pthread_mutex_unlock(&_atforks.lock);
return rc;
}

View file

@ -31,17 +31,16 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
static void pthread_mutex_lock_spin(atomic_int *word) {
int backoff = 0;
uint64_t lock;
for (;;) {
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
return;
backoff = pthread_delay_np(mutex, backoff);
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
break;
for (;;) {
if (!atomic_load_explicit(word, memory_order_relaxed))
break;
backoff = pthread_delay_np(word, backoff);
}
}
}
@ -96,8 +95,13 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
mutex->_pid = __pid;
return 0;
}
for (;;) {
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
if (!MUTEX_LOCKED(word))
break;
backoff = pthread_delay_np(mutex, backoff);
}
}
}
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
@ -121,7 +125,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_lock_naive(mutex, word);
pthread_mutex_lock_spin(&mutex->_futex);
}
return 0;
}

View file

@ -27,14 +27,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
uint64_t word) {
uint64_t lock;
word = MUTEX_UNLOCK(word);
lock = MUTEX_LOCK(word);
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
memory_order_acquire,
memory_order_relaxed))
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
return 0;
return EBUSY;
}
@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wait_)) {
return pthread_mutex_trylock_drepper(&mutex->_futex);
} else {
return pthread_mutex_trylock_naive(mutex, word);
return pthread_mutex_trylock_spin(&mutex->_futex);
}
}

View file

@ -28,9 +28,8 @@
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/mu.h"
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
uint64_t lock = MUTEX_UNLOCK(word);
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
static void pthread_mutex_unlock_spin(atomic_int *word) {
atomic_store_explicit(word, 0, memory_order_release);
}
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
if (_weaken(nsync_futex_wake_)) {
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
} else {
pthread_mutex_unlock_naive(mutex, word);
pthread_mutex_unlock_spin(&mutex->_futex);
}
return 0;
}

View file

@ -21,30 +21,46 @@
#include "libc/runtime/runtime.h"
#include "libc/thread/thread.h"
static void (*funcs[32])(void);
static int count;
static pthread_spinlock_t lock;
pthread_spinlock_t *const __at_quick_exit_lockptr = &lock;
static void (*funcs[32])(void);
static pthread_mutex_t __quick_exit_lock_obj;
static void __quick_exit_wipe(void) {
pthread_mutex_init(&__quick_exit_lock_obj, 0);
}
static void __quick_exit_lock(void) {
pthread_mutex_lock(&__quick_exit_lock_obj);
}
static void __quick_exit_unlock(void) {
pthread_mutex_unlock(&__quick_exit_lock_obj);
}
void __funcs_on_quick_exit(void) {
void (*func)(void);
pthread_spin_lock(&lock);
__quick_exit_lock();
while (count) {
func = funcs[--count];
pthread_spin_unlock(&lock);
__quick_exit_unlock();
func();
pthread_spin_lock(&lock);
__quick_exit_lock();
}
}
int at_quick_exit(void func(void)) {
int res = 0;
pthread_spin_lock(&lock);
__quick_exit_lock();
if (count == ARRAYLEN(funcs)) {
res = -1;
} else {
funcs[count++] = func;
}
pthread_spin_unlock(&lock);
__quick_exit_unlock();
return res;
}
__attribute__((__constructor__(10))) textstartup void __quick_exit_init(void) {
pthread_atfork(__quick_exit_lock, __quick_exit_unlock, __quick_exit_wipe);
__quick_exit_wipe();
}

View file

@ -17,6 +17,8 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/cosmo.h"
#include "libc/errno.h"
#include "libc/intrin/promises.h"
#include "libc/intrin/strace.h"
@ -27,14 +29,12 @@
#include "libc/runtime/symbols.internal.h"
#include "libc/runtime/zipos.internal.h"
#include "libc/str/str.h"
#include "libc/thread/thread.h"
#include "libc/x/x.h"
#include "libc/zip.internal.h"
#include "third_party/puff/puff.h"
__static_yoink("__get_symbol");
static pthread_spinlock_t g_lock;
struct SymbolTable *__symtab; // for kprintf
static ssize_t GetZipFile(struct Zipos *zipos, const char *name) {
@ -100,6 +100,25 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
}
}
static void GetSymbolTableInit(void) {
struct Zipos *z;
int e = errno;
if (!__symtab && !__isworker) {
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
if ((__symtab = GetSymbolTableFromZip(z))) {
__symtab->names =
(uint32_t *)((char *)__symtab + __symtab->names_offset);
__symtab->name_base =
(char *)((char *)__symtab + __symtab->name_base_offset);
}
}
if (!__symtab) {
__symtab = GetSymbolTableFromElf();
}
}
errno = e;
}
/**
* Returns symbol table singleton.
*
@ -121,24 +140,7 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
* @return symbol table, or NULL if not found
*/
struct SymbolTable *GetSymbolTable(void) {
struct Zipos *z;
if (pthread_spin_trylock(&g_lock))
return 0;
int e = errno;
if (!__symtab && !__isworker) {
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
if ((__symtab = GetSymbolTableFromZip(z))) {
__symtab->names =
(uint32_t *)((char *)__symtab + __symtab->names_offset);
__symtab->name_base =
(char *)((char *)__symtab + __symtab->name_base_offset);
}
}
if (!__symtab) {
__symtab = GetSymbolTableFromElf();
}
}
errno = e;
pthread_spin_unlock(&g_lock);
static atomic_uint once;
cosmo_once(&once, GetSymbolTableInit);
return __symtab;
}

View file

@ -55,9 +55,14 @@ static void __stdio_fork_parent(void) {
static void __stdio_fork_child(void) {
FILE *f;
for (int i = __fflush.handles.i; i--;)
for (int i = __fflush.handles.i; i--;) {
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
if ((f = __fflush.handles.p[i]))
f->lock = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
pthread_mutex_init(&f->lock, &attr);
pthread_mutexattr_destroy(&attr);
}
pthread_mutex_init(&__fflush_lock_obj, 0);
}

View file

@ -38,8 +38,12 @@
* @see pthread_spin_init
*/
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
pthread_pause_np();
for (;;) {
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
break;
for (;;)
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
break;
}
return 0;
}