Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster in multithreaded environments that have lock contention. This comes at the cost of adding some fixed-cost overhead to mutex invocations. That doesn't matter for Cosmopolitan because our core libraries all encode locking operations as NOP instructions when in single-threaded mode. Overhead only applies starting the moment you first call clone().
2025-06-28 07:18:30 +00:00 · 2022-09-05 13:06:34 -07:00 · 2022-09-05 13:06:34 -07:00 · 7ff0ea8c13
commit 7ff0ea8c13
parent 7de2f229a7
32 changed files with 410 additions and 112 deletions
--- a/third_party/dlmalloc/locks.inc
+++ b/third_party/dlmalloc/locks.inc
@ -1,5 +1,6 @@
 // clang-format off
 #include "libc/calls/calls.h"
+#include "libc/intrin/pthread.h"
 #include "libc/nexgen32e/threaded.h"

 /* --------------------------- Lock preliminaries ------------------------ */
@ -50,6 +51,15 @@
 /* #define TRY_LOCK(lk) ... */
 /* static MLOCK_T malloc_global_mutex = ... */

+#define MLOCK_T               pthread_mutex_t
+#define ACQUIRE_LOCK(lk)      (__threaded && pthread_mutex_lock(lk), 0)
+#define RELEASE_LOCK(lk)      (__threaded && pthread_mutex_unlock(lk), 0)
+#define TRY_LOCK(lk)          (__threaded ? !pthread_mutex_trylock(lk) : 1)
+#define INITIAL_LOCK(lk)      pthread_mutex_init(lk, 0)
+#define DESTROY_LOCK(lk)      pthread_mutex_destroy(lk)
+
+static MLOCK_T malloc_global_mutex;
+
 #elif USE_SPIN_LOCKS

 /* First, define CAS_LOCK and CLEAR_LOCK on ints */