mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 03:27:39 +00:00
Make spin locks go faster
This commit is contained in:
parent
a31d5ea399
commit
c8e25d811c
16 changed files with 150 additions and 123 deletions
|
@ -30,15 +30,27 @@
|
|||
|
||||
static int cpus;
|
||||
static double load;
|
||||
static pthread_spinlock_t lock;
|
||||
static struct NtFileTime idle1, kern1, user1;
|
||||
static pthread_mutex_t getloadavg_lock;
|
||||
|
||||
static void __getloadavg_lock(void) {
|
||||
pthread_mutex_lock(&getloadavg_lock);
|
||||
}
|
||||
|
||||
static void __getloadavg_unlock(void) {
|
||||
pthread_mutex_unlock(&getloadavg_lock);
|
||||
}
|
||||
|
||||
static void __getloadavg_wipe(void) {
|
||||
pthread_mutex_init(&getloadavg_lock, 0);
|
||||
}
|
||||
|
||||
textwindows int sys_getloadavg_nt(double *a, int n) {
|
||||
int i, rc;
|
||||
uint64_t elapsed, used;
|
||||
struct NtFileTime idle, kern, user;
|
||||
BLOCK_SIGNALS;
|
||||
pthread_spin_lock(&lock);
|
||||
__getloadavg_lock();
|
||||
if (GetSystemTimes(&idle, &kern, &user)) {
|
||||
elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
|
||||
if (elapsed) {
|
||||
|
@ -54,7 +66,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
|
|||
} else {
|
||||
rc = __winerr();
|
||||
}
|
||||
pthread_spin_unlock(&lock);
|
||||
__getloadavg_unlock();
|
||||
ALLOW_SIGNALS;
|
||||
return rc;
|
||||
}
|
||||
|
@ -65,5 +77,7 @@ __attribute__((__constructor__(40))) static textstartup void ntinitload(void) {
|
|||
cpus = __get_cpu_count() / 2;
|
||||
cpus = MAX(1, cpus);
|
||||
GetSystemTimes(&idle1, &kern1, &user1);
|
||||
pthread_atfork(__getloadavg_lock, __getloadavg_unlock, __getloadavg_wipe);
|
||||
__getloadavg_wipe();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@
|
|||
#include "libc/sysv/consts/sicode.h"
|
||||
#include "libc/sysv/consts/ss.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#ifdef __x86_64__
|
||||
|
||||
/**
|
||||
|
@ -64,6 +65,20 @@ struct SignalFrame {
|
|||
ucontext_t ctx;
|
||||
};
|
||||
|
||||
static pthread_mutex_t __sig_lock_obj;
|
||||
|
||||
static void __sig_wipe(void) {
|
||||
pthread_mutex_init(&__sig_lock_obj, 0);
|
||||
}
|
||||
|
||||
static void __sig_lock(void) {
|
||||
pthread_mutex_lock(&__sig_lock_obj);
|
||||
}
|
||||
|
||||
static void __sig_unlock(void) {
|
||||
pthread_mutex_unlock(&__sig_lock_obj);
|
||||
}
|
||||
|
||||
static textwindows bool __sig_ignored_by_default(int sig) {
|
||||
return sig == SIGURG || //
|
||||
sig == SIGCONT || //
|
||||
|
@ -318,11 +333,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
|
|||
// take control of thread
|
||||
// suspending the thread happens asynchronously
|
||||
// however getting the context blocks until it's frozen
|
||||
static pthread_spinlock_t killer_lock;
|
||||
pthread_spin_lock(&killer_lock);
|
||||
__sig_lock();
|
||||
if (SuspendThread(th) == -1u) {
|
||||
STRACE("SuspendThread failed w/ %d", GetLastError());
|
||||
pthread_spin_unlock(&killer_lock);
|
||||
__sig_unlock();
|
||||
return ESRCH;
|
||||
}
|
||||
struct NtContext nc;
|
||||
|
@ -330,10 +344,10 @@ static textwindows int __sig_killer(struct PosixThread *pt, int sig, int sic) {
|
|||
if (!GetThreadContext(th, &nc)) {
|
||||
STRACE("GetThreadContext failed w/ %d", GetLastError());
|
||||
ResumeThread(th);
|
||||
pthread_spin_unlock(&killer_lock);
|
||||
__sig_unlock();
|
||||
return ESRCH;
|
||||
}
|
||||
pthread_spin_unlock(&killer_lock);
|
||||
__sig_unlock();
|
||||
|
||||
// we can't preempt threads that masked sig or are blocked
|
||||
// we can't preempt threads that are running in win32 code
|
||||
|
@ -612,6 +626,8 @@ __attribute__((__constructor__(10))) textstartup void __sig_init(void) {
|
|||
return;
|
||||
AddVectoredExceptionHandler(true, (void *)__sig_crash);
|
||||
SetConsoleCtrlHandler((void *)__sig_console, true);
|
||||
pthread_atfork(__sig_lock, __sig_unlock, __sig_wipe);
|
||||
__sig_wipe();
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
|
|
@ -35,4 +35,5 @@ void __cxa_unlock(void) {
|
|||
|
||||
__attribute__((__constructor__(60))) static textstartup void __cxa_init() {
|
||||
pthread_atfork(__cxa_lock, __cxa_unlock, __cxa_wipe);
|
||||
__cxa_wipe();
|
||||
}
|
||||
|
|
|
@ -86,27 +86,15 @@ void __maps_init(void) {
|
|||
|
||||
privileged bool __maps_lock(void) {
|
||||
struct CosmoTib *tib;
|
||||
if (!__tls_enabled)
|
||||
if (__tls_enabled)
|
||||
return false;
|
||||
tib = __get_tls_privileged();
|
||||
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
|
||||
return true;
|
||||
int backoff = 0;
|
||||
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
|
||||
if (backoff < 7) {
|
||||
volatile int i;
|
||||
for (i = 0; i != 1 << backoff; i++) {
|
||||
}
|
||||
backoff++;
|
||||
} else {
|
||||
// STRACE("pthread_delay_np(__maps)");
|
||||
#if defined(__GNUC__) && defined(__aarch64__)
|
||||
__asm__ volatile("yield");
|
||||
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
__asm__ volatile("pause");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire))
|
||||
for (;;)
|
||||
if (!atomic_load_explicit(&__maps.lock, memory_order_relaxed))
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,21 +16,11 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/atomic.h"
|
||||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/cosmo.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/intrin/dll.h"
|
||||
#include "libc/intrin/strace.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/proc/proc.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
struct AtFork {
|
||||
struct AtFork *p[2];
|
||||
|
@ -38,16 +28,16 @@ struct AtFork {
|
|||
};
|
||||
|
||||
static struct AtForks {
|
||||
pthread_spinlock_t lock;
|
||||
pthread_mutex_t lock;
|
||||
struct AtFork *list;
|
||||
struct AtFork pool[64];
|
||||
atomic_int allocated;
|
||||
} _atforks;
|
||||
struct AtFork pool[256];
|
||||
int allocated;
|
||||
} _atforks = {
|
||||
PTHREAD_MUTEX_INITIALIZER,
|
||||
};
|
||||
|
||||
static void _pthread_onfork(int i, const char *op) {
|
||||
struct AtFork *a;
|
||||
if (!i)
|
||||
pthread_spin_lock(&_atforks.lock);
|
||||
for (a = _atforks.list; a; a = a->p[!i]) {
|
||||
if (a->f[i]) {
|
||||
STRACE("pthread_atfork(%s, %t)", op, a->f[i]);
|
||||
|
@ -55,47 +45,41 @@ static void _pthread_onfork(int i, const char *op) {
|
|||
}
|
||||
_atforks.list = a;
|
||||
}
|
||||
if (i)
|
||||
pthread_spin_unlock(&_atforks.lock);
|
||||
}
|
||||
|
||||
void _pthread_onfork_prepare(void) {
|
||||
pthread_mutex_lock(&_atforks.lock);
|
||||
_pthread_onfork(0, "prepare");
|
||||
}
|
||||
|
||||
void _pthread_onfork_parent(void) {
|
||||
_pthread_onfork(1, "parent");
|
||||
pthread_mutex_unlock(&_atforks.lock);
|
||||
}
|
||||
|
||||
void _pthread_onfork_child(void) {
|
||||
pthread_mutex_init(&_atforks.lock, 0);
|
||||
_pthread_onfork(2, "child");
|
||||
}
|
||||
|
||||
static struct AtFork *_pthread_atfork_alloc(void) {
|
||||
int i, n = ARRAYLEN(_atforks.pool);
|
||||
if (atomic_load_explicit(&_atforks.allocated, memory_order_relaxed) < n &&
|
||||
(i = atomic_fetch_add(&_atforks.allocated, 1)) < n) {
|
||||
return _atforks.pool + i;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int _pthread_atfork(atfork_f prepare, atfork_f parent, atfork_f child) {
|
||||
int rc;
|
||||
struct AtFork *a;
|
||||
if (!(a = _pthread_atfork_alloc()))
|
||||
return ENOMEM;
|
||||
pthread_mutex_lock(&_atforks.lock);
|
||||
if (_atforks.allocated < ARRAYLEN(_atforks.pool)) {
|
||||
a = &_atforks.pool[_atforks.allocated++];
|
||||
a->f[0] = prepare;
|
||||
a->f[1] = parent;
|
||||
a->f[2] = child;
|
||||
pthread_spin_lock(&_atforks.lock);
|
||||
a->p[0] = 0;
|
||||
a->p[1] = _atforks.list;
|
||||
if (_atforks.list)
|
||||
_atforks.list->p[0] = a;
|
||||
_atforks.list = a;
|
||||
pthread_spin_unlock(&_atforks.lock);
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = ENOMEM;
|
||||
}
|
||||
pthread_mutex_unlock(&_atforks.lock);
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -31,17 +31,16 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
static void pthread_mutex_lock_spin(atomic_int *word) {
|
||||
int backoff = 0;
|
||||
uint64_t lock;
|
||||
for (;;) {
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
return;
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
||||
break;
|
||||
for (;;) {
|
||||
if (!atomic_load_explicit(word, memory_order_relaxed))
|
||||
break;
|
||||
backoff = pthread_delay_np(word, backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,8 +95,13 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
|
|||
mutex->_pid = __pid;
|
||||
return 0;
|
||||
}
|
||||
for (;;) {
|
||||
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
|
||||
if (!MUTEX_LOCKED(word))
|
||||
break;
|
||||
backoff = pthread_delay_np(mutex, backoff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
||||
|
@ -121,7 +125,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wait_)) {
|
||||
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_lock_naive(mutex, word);
|
||||
pthread_mutex_lock_spin(&mutex->_futex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -27,14 +27,8 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
|
||||
uint64_t word) {
|
||||
uint64_t lock;
|
||||
word = MUTEX_UNLOCK(word);
|
||||
lock = MUTEX_LOCK(word);
|
||||
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
|
||||
memory_order_acquire,
|
||||
memory_order_relaxed))
|
||||
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
|
||||
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
|
||||
return 0;
|
||||
return EBUSY;
|
||||
}
|
||||
|
@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wait_)) {
|
||||
return pthread_mutex_trylock_drepper(&mutex->_futex);
|
||||
} else {
|
||||
return pthread_mutex_trylock_naive(mutex, word);
|
||||
return pthread_mutex_trylock_spin(&mutex->_futex);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,9 +28,8 @@
|
|||
#include "third_party/nsync/futex.internal.h"
|
||||
#include "third_party/nsync/mu.h"
|
||||
|
||||
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
|
||||
uint64_t lock = MUTEX_UNLOCK(word);
|
||||
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
|
||||
static void pthread_mutex_unlock_spin(atomic_int *word) {
|
||||
atomic_store_explicit(word, 0, memory_order_release);
|
||||
}
|
||||
|
||||
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
|
||||
|
@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
|
|||
if (_weaken(nsync_futex_wake_)) {
|
||||
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
|
||||
} else {
|
||||
pthread_mutex_unlock_naive(mutex, word);
|
||||
pthread_mutex_unlock_spin(&mutex->_futex);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -21,30 +21,46 @@
|
|||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
static void (*funcs[32])(void);
|
||||
static int count;
|
||||
static pthread_spinlock_t lock;
|
||||
pthread_spinlock_t *const __at_quick_exit_lockptr = &lock;
|
||||
static void (*funcs[32])(void);
|
||||
static pthread_mutex_t __quick_exit_lock_obj;
|
||||
|
||||
static void __quick_exit_wipe(void) {
|
||||
pthread_mutex_init(&__quick_exit_lock_obj, 0);
|
||||
}
|
||||
|
||||
static void __quick_exit_lock(void) {
|
||||
pthread_mutex_lock(&__quick_exit_lock_obj);
|
||||
}
|
||||
|
||||
static void __quick_exit_unlock(void) {
|
||||
pthread_mutex_unlock(&__quick_exit_lock_obj);
|
||||
}
|
||||
|
||||
void __funcs_on_quick_exit(void) {
|
||||
void (*func)(void);
|
||||
pthread_spin_lock(&lock);
|
||||
__quick_exit_lock();
|
||||
while (count) {
|
||||
func = funcs[--count];
|
||||
pthread_spin_unlock(&lock);
|
||||
__quick_exit_unlock();
|
||||
func();
|
||||
pthread_spin_lock(&lock);
|
||||
__quick_exit_lock();
|
||||
}
|
||||
}
|
||||
|
||||
int at_quick_exit(void func(void)) {
|
||||
int res = 0;
|
||||
pthread_spin_lock(&lock);
|
||||
__quick_exit_lock();
|
||||
if (count == ARRAYLEN(funcs)) {
|
||||
res = -1;
|
||||
} else {
|
||||
funcs[count++] = func;
|
||||
}
|
||||
pthread_spin_unlock(&lock);
|
||||
__quick_exit_unlock();
|
||||
return res;
|
||||
}
|
||||
|
||||
__attribute__((__constructor__(10))) textstartup void __quick_exit_init(void) {
|
||||
pthread_atfork(__quick_exit_lock, __quick_exit_unlock, __quick_exit_wipe);
|
||||
__quick_exit_wipe();
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/atomic.h"
|
||||
#include "libc/cosmo.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/promises.h"
|
||||
#include "libc/intrin/strace.h"
|
||||
|
@ -27,14 +29,12 @@
|
|||
#include "libc/runtime/symbols.internal.h"
|
||||
#include "libc/runtime/zipos.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/x/x.h"
|
||||
#include "libc/zip.internal.h"
|
||||
#include "third_party/puff/puff.h"
|
||||
|
||||
__static_yoink("__get_symbol");
|
||||
|
||||
static pthread_spinlock_t g_lock;
|
||||
struct SymbolTable *__symtab; // for kprintf
|
||||
|
||||
static ssize_t GetZipFile(struct Zipos *zipos, const char *name) {
|
||||
|
@ -100,6 +100,25 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
|
|||
}
|
||||
}
|
||||
|
||||
static void GetSymbolTableInit(void) {
|
||||
struct Zipos *z;
|
||||
int e = errno;
|
||||
if (!__symtab && !__isworker) {
|
||||
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
|
||||
if ((__symtab = GetSymbolTableFromZip(z))) {
|
||||
__symtab->names =
|
||||
(uint32_t *)((char *)__symtab + __symtab->names_offset);
|
||||
__symtab->name_base =
|
||||
(char *)((char *)__symtab + __symtab->name_base_offset);
|
||||
}
|
||||
}
|
||||
if (!__symtab) {
|
||||
__symtab = GetSymbolTableFromElf();
|
||||
}
|
||||
}
|
||||
errno = e;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns symbol table singleton.
|
||||
*
|
||||
|
@ -121,24 +140,7 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
|
|||
* @return symbol table, or NULL if not found
|
||||
*/
|
||||
struct SymbolTable *GetSymbolTable(void) {
|
||||
struct Zipos *z;
|
||||
if (pthread_spin_trylock(&g_lock))
|
||||
return 0;
|
||||
int e = errno;
|
||||
if (!__symtab && !__isworker) {
|
||||
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
|
||||
if ((__symtab = GetSymbolTableFromZip(z))) {
|
||||
__symtab->names =
|
||||
(uint32_t *)((char *)__symtab + __symtab->names_offset);
|
||||
__symtab->name_base =
|
||||
(char *)((char *)__symtab + __symtab->name_base_offset);
|
||||
}
|
||||
}
|
||||
if (!__symtab) {
|
||||
__symtab = GetSymbolTableFromElf();
|
||||
}
|
||||
}
|
||||
errno = e;
|
||||
pthread_spin_unlock(&g_lock);
|
||||
static atomic_uint once;
|
||||
cosmo_once(&once, GetSymbolTableInit);
|
||||
return __symtab;
|
||||
}
|
||||
|
|
|
@ -55,9 +55,14 @@ static void __stdio_fork_parent(void) {
|
|||
|
||||
static void __stdio_fork_child(void) {
|
||||
FILE *f;
|
||||
for (int i = __fflush.handles.i; i--;)
|
||||
for (int i = __fflush.handles.i; i--;) {
|
||||
pthread_mutexattr_t attr;
|
||||
pthread_mutexattr_init(&attr);
|
||||
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
|
||||
if ((f = __fflush.handles.p[i]))
|
||||
f->lock = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
|
||||
pthread_mutex_init(&f->lock, &attr);
|
||||
pthread_mutexattr_destroy(&attr);
|
||||
}
|
||||
pthread_mutex_init(&__fflush_lock_obj, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -38,8 +38,12 @@
|
|||
* @see pthread_spin_init
|
||||
*/
|
||||
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
|
||||
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
|
||||
pthread_pause_np();
|
||||
for (;;) {
|
||||
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
|
||||
break;
|
||||
for (;;)
|
||||
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Reference in a new issue