Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster in multithreaded environments that have lock contention. This comes at the cost of adding some fixed-cost overhead to mutex invocations. That doesn't matter for Cosmopolitan because our core libraries all encode locking operations as NOP instructions when in single-threaded mode. Overhead only applies starting the moment you first call clone().
2025-07-17 16:10:29 +00:00 · 2022-09-05 13:06:34 -07:00 · 2022-09-05 13:06:34 -07:00 · 7ff0ea8c13
commit 7ff0ea8c13
parent 7de2f229a7
32 changed files with 410 additions and 112 deletions
--- a/libc/calls/getloadavg-nt.c
+++ b/libc/calls/getloadavg-nt.c
@ -20,7 +20,7 @@
 #include "libc/calls/syscall_support-nt.internal.h"
 #include "libc/dce.h"
 #include "libc/fmt/conv.h"
-#include "libc/intrin/spinlock.h"
+#include "libc/intrin/pthread.h"
 #include "libc/macros.internal.h"
 #include "libc/nt/accounting.h"
 #include "libc/runtime/sysconf.h"
@ -29,14 +29,14 @@

 static int cpus;
 static double load;
-_Alignas(64) static char lock;
+static pthread_spinlock_t lock;
 static struct NtFileTime idle1, kern1, user1;

 textwindows int sys_getloadavg_nt(double *a, int n) {
  int i, rc;
  uint64_t elapsed, used;
  struct NtFileTime idle, kern, user;
-  _spinlock(&lock);
+  pthread_spin_lock(&lock);
  if (GetSystemTimes(&idle, &kern, &user)) {
    elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
    if (elapsed) {
@ -52,7 +52,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
  } else {
    rc = __winerr();
  }
-  _spunlock(&lock);
+  pthread_spin_unlock(&lock);
  return rc;
 }