Make threads faster and more reliable

This change doubles the performance of thread spawning. That's thanks to
our new stack manager, which allows us to avoid zeroing stacks. It gives
us 15µs spawns rather than 30µs spawns on Linux. Also, pthread_exit() is
faster now, since it doesn't need to acquire the pthread GIL. On NetBSD,
that helps us avoid allocating too many semaphores. Even if that happens
we're now able to survive semaphores running out and even memory running
out, when allocating *NSYNC waiter objects. I found a lot more rare bugs
in the POSIX threads runtime that could cause things to crash, if you've
got dozens of threads all spawning and joining dozens of threads. I want
cosmo to be world class production worthy for 2025 so happy holidays all
This commit is contained in:
Justine Tunney 2024-12-18 04:59:02 -08:00
parent 906bd06a5a
commit 624573207e
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
51 changed files with 1006 additions and 321 deletions

View file

@ -17,12 +17,12 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/ulock.h"
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/syscall_support-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/strace.h"
#include "libc/intrin/ulock.h"
// XNU futexes
// https://opensource.apple.com/source/xnu/xnu-7195.50.7.100.1/bsd/sys/ulock.h.auto.html
@ -32,6 +32,26 @@ int sys_ulock_wait(uint32_t operation, void *addr, uint64_t value,
uint32_t timeout_micros) asm("sys_futex_cp");
// returns number of other waiters, or -1 w/ errno
//
// - EINTR means a signal handler was called. This is how we support
// things like POSIX thread cancelation.
//
// - EFAULT if XNU couldn't read `addr`. This is normally considered a
// programming error, but with ulock it can actually be a transient
// error due to low memory conditions. Apple recommends retrying.
//
// - ENOMEM means XNU wasn't able to allocate memory for kernel internal
// data structures. Apple doesn't provide any advice on what to do. We
// simply turn this into EAGAIN.
//
// - EAGAIN if XNU told us EFAULT but cosmo believes the address exists.
// This value is also used as a substitute for ENOMEM.
//
// - EINVAL could mean operation is invalid, addr is null or misaligned;
// it could also mean another thread calling ulock on this address was
// configured (via operation) in an inconsistent way.
//
// see also os_sync_wait_on_address.h from xcode sdk
int ulock_wait(uint32_t operation, void *addr, uint64_t value,
uint32_t timeout_micros) {
int rc;
@ -39,12 +59,26 @@ int ulock_wait(uint32_t operation, void *addr, uint64_t value,
LOCKTRACE("ulock_wait(%#x, %p, %lx, %u) → ...", operation, addr, value,
timeout_micros);
rc = sys_ulock_wait(operation, addr, value, timeout_micros);
if (rc == -1) {
if (errno == ENOMEM)
errno = EAGAIN;
if (errno == EFAULT)
if (!kisdangerous(addr))
errno = EAGAIN;
}
LOCKTRACE("ulock_wait(%#x, %p, %lx, %u) → %d% m", operation, addr, value,
timeout_micros, rc);
return rc;
}
// returns -errno
//
// - ENOENT means there wasn't anyone to wake
//
// - EINVAL could mean operation is invalid, addr is null or misaligned;
// it could also mean another thread calling ulock on this address was
// configured (via operation) in an inconsistent way.
//
int ulock_wake(uint32_t operation, void *addr, uint64_t wake_value) {
int rc;
rc = __syscall3i(operation, (long)addr, wake_value, 0x2000000 | 516);