Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster
in multithreaded environments that have lock contention. This comes at
the cost of adding some fixed-cost overhead to mutex invocations. That
doesn't matter for Cosmopolitan because our core libraries all encode
locking operations as NOP instructions when in single-threaded mode.
Overhead only applies starting the moment you first call clone().
This commit is contained in:
Justine Tunney 2022-09-05 13:06:34 -07:00
parent 7de2f229a7
commit 7ff0ea8c13
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
32 changed files with 410 additions and 112 deletions

View file

@ -1,5 +1,6 @@
// clang-format off
#include "libc/calls/calls.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/threaded.h"
/* --------------------------- Lock preliminaries ------------------------ */
@ -50,6 +51,15 @@
/* #define TRY_LOCK(lk) ... */
/* static MLOCK_T malloc_global_mutex = ... */
#define MLOCK_T pthread_mutex_t
#define ACQUIRE_LOCK(lk) (__threaded && pthread_mutex_lock(lk), 0)
#define RELEASE_LOCK(lk) (__threaded && pthread_mutex_unlock(lk), 0)
#define TRY_LOCK(lk) (__threaded ? !pthread_mutex_trylock(lk) : 1)
#define INITIAL_LOCK(lk) pthread_mutex_init(lk, 0)
#define DESTROY_LOCK(lk) pthread_mutex_destroy(lk)
static MLOCK_T malloc_global_mutex;
#elif USE_SPIN_LOCKS
/* First, define CAS_LOCK and CLEAR_LOCK on ints */