Make pthread mutexes more scalable

pthread_mutex_lock() now uses a better algorithm which goes much faster
in multithreaded environments that have lock contention. This comes at
the cost of adding some fixed-cost overhead to mutex invocations. That
doesn't matter for Cosmopolitan because our core libraries all encode
locking operations as NOP instructions when in single-threaded mode.
Overhead only applies starting the moment you first call clone().
This commit is contained in:
Justine Tunney 2022-09-05 13:06:34 -07:00
parent 7de2f229a7
commit 7ff0ea8c13
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
32 changed files with 410 additions and 112 deletions

View file

@ -16,42 +16,37 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/likely.h"
#include "libc/calls/clock_gettime.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/calls/struct/timespec.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/rdtsc.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/nexgen32e/x86feature.h"
#include "libc/sysv/consts/clock.h"
#include "libc/sysv/errfuns.h"
#include "libc/time/clockstonanos.internal.h"
#include "libc/time/time.h"
static struct {
bool once;
char lock;
pthread_once_t once;
uint64_t base;
struct timespec mono;
} g_mono;
static void sys_clock_gettime_mono_init(void) {
clock_gettime(CLOCK_REALTIME, &g_mono.mono);
g_mono.base = rdtsc();
g_mono.once = true;
}
int sys_clock_gettime_mono(struct timespec *ts) {
// this routine stops being monotonic after 194 years of uptime
uint64_t nanos;
struct timespec res;
if (X86_HAVE(INVTSC)) {
if (__threaded) {
_spinlock(&g_mono.lock);
}
if (UNLIKELY(!g_mono.once)) {
clock_gettime(CLOCK_REALTIME, &g_mono.mono);
g_mono.base = rdtsc();
g_mono.once = true;
}
pthread_once(&g_mono.once, sys_clock_gettime_mono_init);
nanos = ClocksToNanos(rdtsc(), g_mono.base);
res = g_mono.mono;
res.tv_sec += nanos / 1000000000;
res.tv_nsec += nanos % 1000000000;
_spunlock(&g_mono.lock);
*ts = res;
return 0;
} else {

View file

@ -24,7 +24,6 @@
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/weaken.h"
#include "libc/sock/syscall_fd.internal.h"
#include "libc/sysv/errfuns.h"

View file

@ -20,7 +20,6 @@
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/weaken.h"
#include "libc/nt/files.h"
#include "libc/nt/runtime.h"

View file

@ -19,7 +19,6 @@
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/fileflagandattributes.h"
#include "libc/nt/files.h"

View file

@ -20,7 +20,7 @@
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/pthread.h"
#include "libc/macros.internal.h"
#include "libc/nt/accounting.h"
#include "libc/runtime/sysconf.h"
@ -29,14 +29,14 @@
static int cpus;
static double load;
_Alignas(64) static char lock;
static pthread_spinlock_t lock;
static struct NtFileTime idle1, kern1, user1;
textwindows int sys_getloadavg_nt(double *a, int n) {
int i, rc;
uint64_t elapsed, used;
struct NtFileTime idle, kern, user;
_spinlock(&lock);
pthread_spin_lock(&lock);
if (GetSystemTimes(&idle, &kern, &user)) {
elapsed = (FT(kern) - FT(kern1)) + (FT(user) - FT(user1));
if (elapsed) {
@ -52,7 +52,7 @@ textwindows int sys_getloadavg_nt(double *a, int n) {
} else {
rc = __winerr();
}
_spunlock(&lock);
pthread_spin_unlock(&lock);
return rc;
}

View file

@ -22,7 +22,6 @@
#include "libc/calls/struct/rusage.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/accounting.h"
#include "libc/nt/process.h"
#include "libc/nt/runtime.h"

View file

@ -25,7 +25,6 @@
#include "libc/calls/struct/winsize.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/errno.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/weaken.h"
#include "libc/log/log.h"
#include "libc/nt/console.h"

View file

@ -22,7 +22,6 @@
#include "libc/calls/state.internal.h"
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/filetype.h"
#include "libc/nt/files.h"

View file

@ -19,7 +19,6 @@
#include "libc/calls/internal.h"
#include "libc/calls/state.internal.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/accessmask.h"
#include "libc/nt/enum/creationdisposition.h"

View file

@ -24,7 +24,6 @@
#include "libc/calls/struct/sigaction.h"
#include "libc/errno.h"
#include "libc/intrin/bits.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/weaken.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"

View file

@ -20,7 +20,6 @@
#include "libc/calls/state.internal.h"
#include "libc/calls/struct/fd.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/spinlock.h"
static const char *__fdkind2str(int x) {
switch (x) {

View file

@ -24,7 +24,6 @@
#include "libc/calls/struct/sigset.h"
#include "libc/dce.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/weaken.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"

View file

@ -19,7 +19,7 @@
#include "libc/assert.h"
#include "libc/calls/internal.h"
#include "libc/dce.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/pthread.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/stdio/lcg.internal.h"
@ -35,12 +35,12 @@
textwindows int __sample_pids(int pids[hasatleast 64],
int64_t handles[hasatleast 64],
bool exploratory) {
static char lock;
static uint64_t rando = 1;
static pthread_spinlock_t lock;
uint32_t i, j, base, count;
if (__threaded) _spinlock(&lock);
if (__threaded) pthread_spin_lock(&lock);
base = KnuthLinearCongruentialGenerator(&rando) >> 32;
_spunlock(&lock);
pthread_spin_unlock(&lock);
for (count = i = 0; i < g_fds.n; ++i) {
j = (base + i) % g_fds.n;
if (g_fds.p[j].kind == kFdProcess && (!exploratory || !g_fds.p[j].zombie)) {

View file

@ -18,7 +18,6 @@
*/
#include "libc/calls/sig.internal.h"
#include "libc/calls/state.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/macros.internal.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"

View file

@ -23,7 +23,6 @@
#include "libc/calls/struct/siginfo.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/lockcmpxchg.h"
#include "libc/intrin/spinlock.h"
#include "libc/log/libfatal.internal.h"
#include "libc/macros.internal.h"
#include "libc/runtime/internal.h"

View file

@ -25,7 +25,6 @@
#include "libc/calls/struct/siginfo.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/dce.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/enum/wait.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"

View file

@ -20,7 +20,6 @@
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/errno.h"
#include "libc/intrin/once.h"
#include "libc/intrin/spinlock.h"
#include "libc/nt/enum/accessmask.h"
#include "libc/nt/enum/fileflagandattributes.h"
#include "libc/nt/enum/symboliclink.h"

View file

@ -25,7 +25,6 @@
#include "libc/calls/struct/rusage.h"
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/spinlock.h"
#include "libc/macros.internal.h"
#include "libc/nt/accounting.h"
#include "libc/nt/enum/accessmask.h"