Reduce pthread memory usage

This commit is contained in:
Justine Tunney 2024-12-24 10:30:59 -08:00
parent ec2db4e40e
commit 93e22c581f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 75 additions and 58 deletions

View file

@ -64,7 +64,7 @@ static struct CosmoStacks cosmo_stacks = {
}; };
static struct CosmoStacksConfig cosmo_stacks_config = { static struct CosmoStacksConfig cosmo_stacks_config = {
.maxstacks = 16, .maxstacks = 3,
}; };
void cosmo_stack_lock(void) { void cosmo_stack_lock(void) {
@ -169,7 +169,7 @@ int cosmo_stack_getmaxstacks(void) {
* *
* Please note this limit only applies to stacks that aren't in use. * Please note this limit only applies to stacks that aren't in use.
* *
* Your default is sixteen stacks may be cached at any given moment. * Your default is three stacks may be cached at any given moment.
* *
* If `maxstacks` is less than the current cache size, then surplus * If `maxstacks` is less than the current cache size, then surplus
* entries will be evicted and freed before this function returns. * entries will be evicted and freed before this function returns.
@ -292,10 +292,10 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
return EINVAL; return EINVAL;
if ((uintptr_t)stackaddr & (__gransize - 1)) if ((uintptr_t)stackaddr & (__gransize - 1))
return EINVAL; return EINVAL;
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
cosmo_stack_lock(); cosmo_stack_lock();
struct Dll *surplus = 0; struct Dll *surplus = 0;
if (cosmo_stacks_config.maxstacks) { if (cosmo_stacks_config.maxstacks) {
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
surplus = cosmo_stack_decimate(cosmo_stacks_config.maxstacks - 1); surplus = cosmo_stack_decimate(cosmo_stacks_config.maxstacks - 1);
struct CosmoStack *ts = 0; struct CosmoStack *ts = 0;
if (dll_is_empty(cosmo_stacks.objects)) if (dll_is_empty(cosmo_stacks.objects))

View file

@ -114,6 +114,8 @@ static void fork_prepare(void) {
fork_prepare_stdio(); fork_prepare_stdio();
__localtime_lock(); __localtime_lock();
__dlopen_lock(); __dlopen_lock();
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__cxa_lock(); __cxa_lock();
__gdtoa_lock1(); __gdtoa_lock1();
__gdtoa_lock(); __gdtoa_lock();
@ -121,16 +123,12 @@ static void fork_prepare(void) {
dlmalloc_pre_fork(); dlmalloc_pre_fork();
__fds_lock(); __fds_lock();
_pthread_mutex_lock(&__rand64_lock_obj); _pthread_mutex_lock(&__rand64_lock_obj);
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__maps_lock(); __maps_lock();
LOCKTRACE("READY TO LOCK AND ROLL"); LOCKTRACE("READY TO LOCK AND ROLL");
} }
static void fork_parent(void) { static void fork_parent(void) {
__maps_unlock(); __maps_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
_pthread_mutex_unlock(&__rand64_lock_obj); _pthread_mutex_unlock(&__rand64_lock_obj);
__fds_unlock(); __fds_unlock();
dlmalloc_post_fork_parent(); dlmalloc_post_fork_parent();
@ -138,6 +136,8 @@ static void fork_parent(void) {
__gdtoa_unlock(); __gdtoa_unlock();
__gdtoa_unlock1(); __gdtoa_unlock1();
__cxa_unlock(); __cxa_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
__dlopen_unlock(); __dlopen_unlock();
__localtime_unlock(); __localtime_unlock();
fork_parent_stdio(); fork_parent_stdio();
@ -148,8 +148,6 @@ static void fork_parent(void) {
static void fork_child(void) { static void fork_child(void) {
nsync_mu_semaphore_sem_fork_child(); nsync_mu_semaphore_sem_fork_child();
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__dlopen_lock_obj); _pthread_mutex_wipe_np(&__dlopen_lock_obj);
_pthread_mutex_wipe_np(&__rand64_lock_obj); _pthread_mutex_wipe_np(&__rand64_lock_obj);
_pthread_mutex_wipe_np(&__fds_lock_obj); _pthread_mutex_wipe_np(&__fds_lock_obj);
@ -159,6 +157,8 @@ static void fork_child(void) {
fork_child_stdio(); fork_child_stdio();
_pthread_mutex_wipe_np(&__pthread_lock_obj); _pthread_mutex_wipe_np(&__pthread_lock_obj);
_pthread_mutex_wipe_np(&__cxa_lock_obj); _pthread_mutex_wipe_np(&__cxa_lock_obj);
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__localtime_lock_obj); _pthread_mutex_wipe_np(&__localtime_lock_obj);
if (IsWindows()) { if (IsWindows()) {
// we don't bother locking the proc/itimer/sig locks above since // we don't bother locking the proc/itimer/sig locks above since
@ -204,11 +204,11 @@ int _fork(uint32_t dwCreationFlags) {
struct CosmoTib *tib = __get_tls(); struct CosmoTib *tib = __get_tls();
struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread; struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread;
tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid(); tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid();
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed); atomic_init(&tib->tib_tid, tid);
atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed); atomic_init(&pt->ptid, tid);
// tracing and kisdangerous need this lock wiped a little earlier // tracing and kisdangerous need this lock wiped a little earlier
atomic_store_explicit(&__maps.lock.word, 0, memory_order_relaxed); atomic_init(&__maps.lock.word, 0);
/* /*
* it's now safe to call normal functions again * it's now safe to call normal functions again
@ -218,14 +218,10 @@ int _fork(uint32_t dwCreationFlags) {
// we can't free() them since we're monopolizing all locks // we can't free() them since we're monopolizing all locks
// we assume the operating system already reclaimed system handles // we assume the operating system already reclaimed system handles
dll_remove(&_pthread_list, &pt->list); dll_remove(&_pthread_list, &pt->list);
for (e = dll_first(_pthread_list); e; e = dll_next(_pthread_list, e)) { struct Dll *old_threads = _pthread_list;
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->pt_status, _pthread_list = 0;
kPosixThreadZombie, memory_order_relaxed);
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->tib->tib_syshand, 0,
memory_order_relaxed);
}
dll_make_first(&_pthread_list, &pt->list); dll_make_first(&_pthread_list, &pt->list);
atomic_store_explicit(&_pthread_count, 1, memory_order_relaxed); atomic_init(&_pthread_count, 1);
// get new system thread handle // get new system thread handle
intptr_t syshand = 0; intptr_t syshand = 0;
@ -236,16 +232,27 @@ int _fork(uint32_t dwCreationFlags) {
GetCurrentProcess(), &syshand, 0, false, GetCurrentProcess(), &syshand, 0, false,
kNtDuplicateSameAccess); kNtDuplicateSameAccess);
} }
atomic_store_explicit(&tib->tib_syshand, syshand, memory_order_relaxed); atomic_init(&tib->tib_syshand, syshand);
// we can't be canceled if the canceler no longer exists // we can't be canceled if the canceler no longer exists
atomic_store_explicit(&pt->pt_canceled, false, memory_order_relaxed); atomic_init(&pt->pt_canceled, false);
// forget locks // forget locks
memset(tib->tib_locks, 0, sizeof(tib->tib_locks)); memset(tib->tib_locks, 0, sizeof(tib->tib_locks));
// run user fork callbacks // run user fork callbacks
fork_child(); fork_child();
// free threads
if (_weaken(_pthread_free)) {
while ((e = dll_first(old_threads))) {
pt = POSIXTHREAD_CONTAINER(e);
atomic_init(&pt->tib->tib_syshand, 0);
dll_remove(&old_threads, e);
_weaken(_pthread_free)(pt);
}
}
STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros); STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros);
} else { } else {
// this is the parent process // this is the parent process

View file

@ -535,7 +535,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16); sp = AlignStack(sp, stk, stksz, 16);
tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel); tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel);
wt->this = tid = (tid & (kMaxThreadIds - 1)) + kMinThreadId; wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0; wt->tls = flags & CLONE_SETTLS ? tls : 0;
@ -550,8 +550,8 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
unassert(!__syslib->__pthread_attr_init(attr)); unassert(!__syslib->__pthread_attr_init(attr));
unassert(!__syslib->__pthread_attr_setguardsize(attr, 0)); unassert(!__syslib->__pthread_attr_setguardsize(attr, 0));
unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack)); unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack));
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt)) && if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) {
(flags & CLONE_PARENT_SETTID)) { if (flags & CLONE_PARENT_SETTID)
*ptid = tid; *ptid = tid;
if (flags & CLONE_SETTLS) { if (flags & CLONE_SETTLS) {
struct CosmoTib *tib = tls; struct CosmoTib *tib = tls;

View file

@ -94,7 +94,7 @@ struct PosixThread {
typedef void (*atfork_f)(void); typedef void (*atfork_f)(void);
extern struct Dll *_pthread_list; extern struct Dll *_pthread_list;
extern _Atomic(unsigned) _pthread_count; extern atomic_uint _pthread_count;
extern struct PosixThread _pthread_static; extern struct PosixThread _pthread_static;
extern _Atomic(pthread_key_dtor) _pthread_key_dtor[PTHREAD_KEYS_MAX]; extern _Atomic(pthread_key_dtor) _pthread_key_dtor[PTHREAD_KEYS_MAX];
@ -109,6 +109,7 @@ int _pthread_tid(struct PosixThread *) libcesque;
intptr_t _pthread_syshand(struct PosixThread *) libcesque; intptr_t _pthread_syshand(struct PosixThread *) libcesque;
long _pthread_cancel_ack(void) libcesque; long _pthread_cancel_ack(void) libcesque;
void _pthread_decimate(void) libcesque; void _pthread_decimate(void) libcesque;
void _pthread_free(struct PosixThread *) libcesque paramsnonnull();
void _pthread_lock(void) libcesque; void _pthread_lock(void) libcesque;
void _pthread_onfork_child(void) libcesque; void _pthread_onfork_child(void) libcesque;
void _pthread_onfork_parent(void) libcesque; void _pthread_onfork_parent(void) libcesque;

View file

@ -67,7 +67,7 @@ __static_yoink("_pthread_onfork_prepare");
__static_yoink("_pthread_onfork_parent"); __static_yoink("_pthread_onfork_parent");
__static_yoink("_pthread_onfork_child"); __static_yoink("_pthread_onfork_child");
static void _pthread_free(struct PosixThread *pt) { void _pthread_free(struct PosixThread *pt) {
// thread must be removed from _pthread_list before calling // thread must be removed from _pthread_list before calling
unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list); unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list);
@ -84,7 +84,7 @@ static void _pthread_free(struct PosixThread *pt) {
// free any additional upstream system resources // free any additional upstream system resources
// our fork implementation wipes this handle in child automatically // our fork implementation wipes this handle in child automatically
uint64_t syshand = uint64_t syshand =
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire); atomic_load_explicit(&pt->tib->tib_syshand, memory_order_relaxed);
if (syshand) { if (syshand) {
if (IsWindows()) if (IsWindows())
unassert(CloseHandle(syshand)); // non-inheritable unassert(CloseHandle(syshand)); // non-inheritable

View file

@ -62,21 +62,24 @@ static const char *DescribeReturnValue(char buf[30], int err, void **value) {
* @cancelationpoint * @cancelationpoint
*/ */
static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) { static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
int x, e;
errno_t err = 0;
if (ctid == &__get_tls()->tib_tid) {
// "If an implementation detects that the value specified by the // "If an implementation detects that the value specified by the
// thread argument to pthread_join() refers to the calling thread, // thread argument to pthread_join() refers to the calling thread,
// it is recommended that the function should fail and report an // it is recommended that the function should fail and report an
// [EDEADLK] error." ──Quoth POSIX.1-2017 // [EDEADLK] error." ──Quoth POSIX.1-2017
err = EDEADLK; if (ctid == &__get_tls()->tib_tid)
} else { return EDEADLK;
// "If the thread calling pthread_join() is canceled, then the target // "If the thread calling pthread_join() is canceled, then the target
// thread shall not be detached." ──Quoth POSIX.1-2017 // thread shall not be detached." ──Quoth POSIX.1-2017
if (!(err = pthread_testcancel_np())) { errno_t err;
if ((err = pthread_testcancel_np()))
return err;
BEGIN_CANCELATION_POINT; BEGIN_CANCELATION_POINT;
int x;
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) { while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME, int e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME,
abstime); abstime);
if (e == -ECANCELED) { if (e == -ECANCELED) {
err = ECANCELED; err = ECANCELED;
@ -87,8 +90,6 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
} }
} }
END_CANCELATION_POINT; END_CANCELATION_POINT;
}
}
return err; return err;
} }
@ -117,12 +118,11 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr, errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
struct timespec *abstime) { struct timespec *abstime) {
int tid; int tid;
errno_t err = 0; errno_t err;
struct PosixThread *pt; struct PosixThread *pt;
enum PosixThreadStatus status; enum PosixThreadStatus status;
pt = (struct PosixThread *)thread; pt = (struct PosixThread *)thread;
unassert(thread); unassert(thread);
_pthread_ref(pt);
// "The behavior is undefined if the value specified by the thread // "The behavior is undefined if the value specified by the thread
// argument to pthread_join() does not refer to a joinable thread." // argument to pthread_join() does not refer to a joinable thread."
@ -135,14 +135,23 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
// specifying the same target thread are undefined." // specifying the same target thread are undefined."
// ──Quoth POSIX.1-2017 // ──Quoth POSIX.1-2017
if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) { if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) {
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_zombify(pt);
if (value_ptr) if (value_ptr)
*value_ptr = pt->pt_val; *value_ptr = pt->pt_val;
if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire)) {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
dll_make_last(&_pthread_list, &pt->list);
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_unlock();
} else {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
_pthread_unlock();
_pthread_free(pt);
}
} }
_pthread_unref(pt);
STRACE("pthread_timedjoin_np(%d, %s, %s) → %s", tid, STRACE("pthread_timedjoin_np(%d, %s, %s) → %s", tid,
DescribeReturnValue(alloca(30), err, value_ptr), DescribeReturnValue(alloca(30), err, value_ptr),
DescribeTimespec(err ? -1 : 0, abstime), DescribeErrno(err)); DescribeTimespec(err ? -1 : 0, abstime), DescribeErrno(err));