Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster
in multithreaded environments that have lock contention. This comes at
the cost of adding some fixed-cost overhead to mutex invocations. That
doesn't matter for Cosmopolitan because our core libraries all encode
locking operations as NOP instructions when in single-threaded mode.
Overhead only applies starting the moment you first call clone().
This commit is contained in:
Justine Tunney 2022-09-05 13:06:34 -07:00
parent 7de2f229a7
commit 7ff0ea8c13
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
32 changed files with 410 additions and 112 deletions

View file

@ -19,7 +19,7 @@
#include "libc/assert.h"
#include "libc/calls/internal.h"
#include "libc/dce.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/stdio/lcg.internal.h"
@ -35,12 +35,12 @@
textwindows int __sample_pids(int pids[hasatleast 64],
int64_t handles[hasatleast 64],
bool exploratory) {
static char lock;
static uint64_t rando = 1;
static pthread_spinlock_t lock;
uint32_t i, j, base, count;
if (__threaded) _spinlock(&lock);
if (__threaded) pthread_spin_lock(&lock);
base = KnuthLinearCongruentialGenerator(&rando) >> 32;
_spunlock(&lock);
pthread_spin_unlock(&lock);
for (count = i = 0; i < g_fds.n; ++i) {
j = (base + i) % g_fds.n;
if (g_fds.p[j].kind == kFdProcess && (!exploratory || !g_fds.p[j].zombie)) {