Improve multithreading

This commit is contained in:
Justine Tunney 2024-07-21 06:41:30 -07:00
parent d3167126aa
commit 30afd6ddbb
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
38 changed files with 752 additions and 174 deletions

View file

@ -18,6 +18,7 @@
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/sigaltstack.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
@ -67,54 +68,104 @@ __static_yoink("_pthread_onfork_child");
#define MAP_ANON_OPENBSD 0x1000
#define MAP_STACK_OPENBSD 0x4000
void _pthread_free(struct PosixThread *pt, bool isfork) {
void _pthread_free(struct PosixThread *pt) {
// thread must be removed from _pthread_list before calling
unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list);
// do nothing for the one and only magical statical posix thread
if (pt->pt_flags & PT_STATIC)
return;
// unmap stack if the cosmo runtime was responsible for mapping it
if (pt->pt_flags & PT_OWNSTACK)
unassert(!munmap(pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize));
if (!isfork) {
uint64_t syshand =
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire);
if (syshand) {
if (IsWindows())
unassert(CloseHandle(syshand));
else if (IsXnuSilicon())
__syslib->__pthread_join(syshand, 0);
}
// free any additional upstream system resources
// our fork implementation wipes this handle in child automatically
uint64_t syshand =
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire);
if (syshand) {
if (IsWindows())
unassert(CloseHandle(syshand)); // non-inheritable
else if (IsXnuSilicon())
unassert(!__syslib->__pthread_join(syshand, 0));
}
// free heap memory associated with thread
if (pt->pt_flags & PT_OWNSIGALTSTACK)
free(pt->pt_attr.__sigaltstackaddr);
free(pt->pt_tls);
free(pt);
}
void _pthread_decimate(void) {
struct Dll *e;
void _pthread_decimate(bool annihilation_only) {
struct PosixThread *pt;
struct Dll *e, *e2, *list = 0;
enum PosixThreadStatus status;
StartOver:
// acquire posix threads gil
_pthread_lock();
for (e = dll_last(_pthread_list); e; e = dll_prev(_pthread_list, e)) {
// swiftly remove every single zombie
// that isn't being held by a killing thread
for (e = dll_last(_pthread_list); e; e = e2) {
e2 = dll_prev(_pthread_list, e);
pt = POSIXTHREAD_CONTAINER(e);
if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire) > 0)
continue; // pthread_kill() has a lease on this thread
status = atomic_load_explicit(&pt->pt_status, memory_order_acquire);
if (status != kPosixThreadZombie)
break;
if (!atomic_load_explicit(&pt->tib->tib_tid, memory_order_acquire)) {
dll_remove(&_pthread_list, e);
_pthread_unlock();
_pthread_unref(pt);
goto StartOver;
}
break; // zombies only exist at the end of the linked list
if (atomic_load_explicit(&pt->tib->tib_tid, memory_order_acquire))
continue; // undead thread should that'll stop existing soon
dll_remove(&_pthread_list, e);
dll_make_first(&list, e);
}
// code like pthread_exit() needs to call this in order to know if
// it's appropriate to run exit() handlers however we really don't
// want to have a thread exiting block on a bunch of __maps locks!
// therefore we only take action if we'll destroy all but the self
if (annihilation_only)
if (!(_pthread_list == _pthread_list->prev &&
_pthread_list == _pthread_list->next)) {
dll_make_last(&_pthread_list, list);
list = 0;
}
// release posix threads gil
_pthread_unlock();
// now free our thread local batch of zombies
// because death is a release and not a punishment
// this is advantaged by not holding locks over munmap
while ((e = dll_first(list))) {
pt = POSIXTHREAD_CONTAINER(e);
dll_remove(&list, e);
_pthread_free(pt);
}
}
static int PosixThread(void *arg, int tid) {
void *rc;
struct PosixThread *pt = arg;
// setup scheduling
if (pt->pt_attr.__inheritsched == PTHREAD_EXPLICIT_SCHED) {
unassert(_weaken(_pthread_reschedule));
_weaken(_pthread_reschedule)(pt); // yoinked by attribute builder
}
// setup signal stack
if (pt->pt_attr.__sigaltstacksize) {
struct sigaltstack ss;
ss.ss_sp = pt->pt_attr.__sigaltstackaddr;
ss.ss_size = pt->pt_attr.__sigaltstacksize;
ss.ss_flags = 0;
unassert(!sigaltstack(&ss, 0));
}
// set long jump handler so pthread_exit can bring control back here
if (!setjmp(pt->pt_exiter)) {
sigdelset(&pt->pt_attr.__sigmask, SIGTHR);
@ -130,41 +181,45 @@ static int PosixThread(void *arg, int tid) {
// calling pthread_exit() will either jump back here, or call exit
pthread_exit(rc);
}
// avoid signal handler being triggered after we trash our own stack
__sig_block();
// return to clone polyfill which clears tid, wakes futex, and exits
return 0;
}
static int FixupCustomStackOnOpenbsd(pthread_attr_t *attr) {
// OpenBSD: Only permits RSP to occupy memory that's been explicitly
// defined as stack memory. We need to squeeze the provided interval
// in order to successfully call mmap(), which will return EINVAL if
// these calculations should overflow.
size_t n;
uintptr_t x, y;
int e, rc, pagesz;
pagesz = __pagesize;
n = attr->__stacksize;
x = (uintptr_t)attr->__stackaddr;
y = ROUNDUP(x, pagesz);
n -= y - x;
n = ROUNDDOWN(n, pagesz);
e = errno;
if (__sys_mmap((void *)y, n, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED | MAP_ANON_OPENBSD | MAP_STACK_OPENBSD,
-1, 0, 0) == (void *)y) {
attr->__stackaddr = (void *)y;
attr->__stacksize = n;
return 0;
} else {
rc = errno;
errno = e;
if (rc == EOVERFLOW) {
rc = EINVAL;
}
return rc;
static bool TellOpenbsdThisIsStackMemory(void *addr, size_t size) {
return __sys_mmap(
addr, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED | MAP_ANON_OPENBSD | MAP_STACK_OPENBSD, -1,
0, 0) == addr;
}
// OpenBSD only permits RSP to occupy memory that's been explicitly
// defined as stack memory, i.e. `lo <= %rsp < hi` must be the case
static errno_t FixupCustomStackOnOpenbsd(pthread_attr_t *attr) {
// get interval
uintptr_t lo = (uintptr_t)attr->__stackaddr;
uintptr_t hi = lo + attr->__stacksize;
// squeeze interval
lo = (lo + __pagesize - 1) & -__pagesize;
hi = hi & -__pagesize;
// tell os it's stack memory
errno_t olderr = errno;
if (!TellOpenbsdThisIsStackMemory((void *)lo, hi - lo)) {
errno_t err = errno;
errno = olderr;
return err;
}
// update attributes with usable stack address
attr->__stackaddr = (void *)lo;
attr->__stacksize = hi - lo;
return 0;
}
static errno_t pthread_create_impl(pthread_t *thread,
@ -204,7 +259,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
// assume they know what they're doing as much as possible
if (IsOpenbsd()) {
if ((rc = FixupCustomStackOnOpenbsd(&pt->pt_attr))) {
_pthread_free(pt, false);
_pthread_free(pt);
return rc;
}
}
@ -214,21 +269,17 @@ static errno_t pthread_create_impl(pthread_t *thread,
pt->pt_attr.__guardsize = ROUNDUP(pt->pt_attr.__guardsize, pagesize);
pt->pt_attr.__stacksize = pt->pt_attr.__stacksize;
if (pt->pt_attr.__guardsize + pagesize > pt->pt_attr.__stacksize) {
_pthread_free(pt, false);
_pthread_free(pt);
return EINVAL;
}
pt->pt_attr.__stackaddr =
mmap(0, pt->pt_attr.__stacksize, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (pt->pt_attr.__stackaddr != MAP_FAILED) {
if (IsOpenbsd() &&
__sys_mmap(
pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_FIXED | MAP_ANON_OPENBSD | MAP_STACK_OPENBSD,
-1, 0, 0) != pt->pt_attr.__stackaddr) {
notpossible;
}
if (IsOpenbsd())
if (!TellOpenbsdThisIsStackMemory(pt->pt_attr.__stackaddr,
pt->pt_attr.__stacksize))
notpossible;
if (pt->pt_attr.__guardsize)
if (mprotect(pt->pt_attr.__stackaddr, pt->pt_attr.__guardsize,
PROT_NONE | PROT_GUARD))
@ -236,7 +287,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
}
if (!pt->pt_attr.__stackaddr || pt->pt_attr.__stackaddr == MAP_FAILED) {
rc = errno;
_pthread_free(pt, false);
_pthread_free(pt);
errno = e;
if (rc == EINVAL || rc == EOVERFLOW) {
return EINVAL;
@ -247,6 +298,18 @@ static errno_t pthread_create_impl(pthread_t *thread,
pt->pt_flags |= PT_OWNSTACK;
}
// setup signal stack
if (pt->pt_attr.__sigaltstacksize) {
if (!pt->pt_attr.__sigaltstackaddr) {
if (!(pt->pt_attr.__sigaltstackaddr =
malloc(pt->pt_attr.__sigaltstacksize))) {
_pthread_free(pt);
return errno;
}
pt->pt_flags |= PT_OWNSIGALTSTACK;
}
}
// set initial status
pt->tib->tib_pthread = (pthread_t)pt;
atomic_store_explicit(&pt->tib->tib_sigmask, -1, memory_order_relaxed);
@ -264,7 +327,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
memory_order_relaxed);
break;
default:
_pthread_free(pt, false);
_pthread_free(pt);
return EINVAL;
}
@ -284,7 +347,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
_pthread_unlock();
_pthread_free(pt, false);
_pthread_free(pt);
return rc;
}
@ -353,7 +416,7 @@ static const char *DescribeHandle(char buf[12], errno_t err, pthread_t *th) {
errno_t pthread_create(pthread_t *thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *arg) {
errno_t err;
_pthread_decimate();
_pthread_decimate(false);
BLOCK_SIGNALS;
err = pthread_create_impl(thread, attr, start_routine, arg, _SigMask);
ALLOW_SIGNALS;