Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster in multithreaded environments that have lock contention. This comes at the cost of adding some fixed-cost overhead to mutex invocations. That doesn't matter for Cosmopolitan because our core libraries all encode locking operations as NOP instructions when in single-threaded mode. Overhead only applies starting the moment you first call clone().
2025-07-06 03:08:31 +00:00 · 2022-09-05 13:06:34 -07:00 · 2022-09-05 13:06:34 -07:00 · 7ff0ea8c13
commit 7ff0ea8c13
parent 7de2f229a7
32 changed files with 410 additions and 112 deletions
--- a/libc/calls/samplepids.c
+++ b/libc/calls/samplepids.c
@ -19,7 +19,7 @@
 #include "libc/assert.h"
 #include "libc/calls/internal.h"
 #include "libc/dce.h"
-#include "libc/intrin/spinlock.h"
+#include "libc/intrin/pthread.h"
 #include "libc/nexgen32e/threaded.h"
 #include "libc/stdio/lcg.internal.h"

@ -35,12 +35,12 @@
 textwindows int __sample_pids(int pids[hasatleast 64],
                              int64_t handles[hasatleast 64],
                              bool exploratory) {
-  static char lock;
  static uint64_t rando = 1;
+  static pthread_spinlock_t lock;
  uint32_t i, j, base, count;
-  if (__threaded) _spinlock(&lock);
+  if (__threaded) pthread_spin_lock(&lock);
  base = KnuthLinearCongruentialGenerator(&rando) >> 32;
-  _spunlock(&lock);
+  pthread_spin_unlock(&lock);
  for (count = i = 0; i < g_fds.n; ++i) {
    j = (base + i) % g_fds.n;
    if (g_fds.p[j].kind == kFdProcess && (!exploratory || !g_fds.p[j].zombie)) {