Reduce pthread memory usage

This commit is contained in:
Justine Tunney 2024-12-24 10:30:59 -08:00
parent ec2db4e40e
commit 93e22c581f
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 75 additions and 58 deletions

View file

@ -64,7 +64,7 @@ static struct CosmoStacks cosmo_stacks = {
};
static struct CosmoStacksConfig cosmo_stacks_config = {
.maxstacks = 16,
.maxstacks = 3,
};
void cosmo_stack_lock(void) {
@ -169,7 +169,7 @@ int cosmo_stack_getmaxstacks(void) {
*
* Please note this limit only applies to stacks that aren't in use.
*
* Your default is sixteen stacks may be cached at any given moment.
* Your default is three stacks may be cached at any given moment.
*
* If `maxstacks` is less than the current cache size, then surplus
* entries will be evicted and freed before this function returns.
@ -292,10 +292,10 @@ errno_t cosmo_stack_free(void *stackaddr, unsigned stacksize,
return EINVAL;
if ((uintptr_t)stackaddr & (__gransize - 1))
return EINVAL;
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
cosmo_stack_lock();
struct Dll *surplus = 0;
if (cosmo_stacks_config.maxstacks) {
cosmo_once(&cosmo_stacks.once, cosmo_stack_setup);
surplus = cosmo_stack_decimate(cosmo_stacks_config.maxstacks - 1);
struct CosmoStack *ts = 0;
if (dll_is_empty(cosmo_stacks.objects))

View file

@ -114,6 +114,8 @@ static void fork_prepare(void) {
fork_prepare_stdio();
__localtime_lock();
__dlopen_lock();
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__cxa_lock();
__gdtoa_lock1();
__gdtoa_lock();
@ -121,16 +123,12 @@ static void fork_prepare(void) {
dlmalloc_pre_fork();
__fds_lock();
_pthread_mutex_lock(&__rand64_lock_obj);
if (_weaken(cosmo_stack_lock))
_weaken(cosmo_stack_lock)();
__maps_lock();
LOCKTRACE("READY TO LOCK AND ROLL");
}
static void fork_parent(void) {
__maps_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
_pthread_mutex_unlock(&__rand64_lock_obj);
__fds_unlock();
dlmalloc_post_fork_parent();
@ -138,6 +136,8 @@ static void fork_parent(void) {
__gdtoa_unlock();
__gdtoa_unlock1();
__cxa_unlock();
if (_weaken(cosmo_stack_unlock))
_weaken(cosmo_stack_unlock)();
__dlopen_unlock();
__localtime_unlock();
fork_parent_stdio();
@ -148,8 +148,6 @@ static void fork_parent(void) {
static void fork_child(void) {
nsync_mu_semaphore_sem_fork_child();
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
_pthread_mutex_wipe_np(&__rand64_lock_obj);
_pthread_mutex_wipe_np(&__fds_lock_obj);
@ -159,6 +157,8 @@ static void fork_child(void) {
fork_child_stdio();
_pthread_mutex_wipe_np(&__pthread_lock_obj);
_pthread_mutex_wipe_np(&__cxa_lock_obj);
if (_weaken(cosmo_stack_wipe))
_weaken(cosmo_stack_wipe)();
_pthread_mutex_wipe_np(&__localtime_lock_obj);
if (IsWindows()) {
// we don't bother locking the proc/itimer/sig locks above since
@ -204,11 +204,11 @@ int _fork(uint32_t dwCreationFlags) {
struct CosmoTib *tib = __get_tls();
struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread;
tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid();
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed);
atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed);
atomic_init(&tib->tib_tid, tid);
atomic_init(&pt->ptid, tid);
// tracing and kisdangerous need this lock wiped a little earlier
atomic_store_explicit(&__maps.lock.word, 0, memory_order_relaxed);
atomic_init(&__maps.lock.word, 0);
/*
* it's now safe to call normal functions again
@ -218,14 +218,10 @@ int _fork(uint32_t dwCreationFlags) {
// we can't free() them since we're monopolizing all locks
// we assume the operating system already reclaimed system handles
dll_remove(&_pthread_list, &pt->list);
for (e = dll_first(_pthread_list); e; e = dll_next(_pthread_list, e)) {
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->pt_status,
kPosixThreadZombie, memory_order_relaxed);
atomic_store_explicit(&POSIXTHREAD_CONTAINER(e)->tib->tib_syshand, 0,
memory_order_relaxed);
}
struct Dll *old_threads = _pthread_list;
_pthread_list = 0;
dll_make_first(&_pthread_list, &pt->list);
atomic_store_explicit(&_pthread_count, 1, memory_order_relaxed);
atomic_init(&_pthread_count, 1);
// get new system thread handle
intptr_t syshand = 0;
@ -236,16 +232,27 @@ int _fork(uint32_t dwCreationFlags) {
GetCurrentProcess(), &syshand, 0, false,
kNtDuplicateSameAccess);
}
atomic_store_explicit(&tib->tib_syshand, syshand, memory_order_relaxed);
atomic_init(&tib->tib_syshand, syshand);
// we can't be canceled if the canceler no longer exists
atomic_store_explicit(&pt->pt_canceled, false, memory_order_relaxed);
atomic_init(&pt->pt_canceled, false);
// forget locks
memset(tib->tib_locks, 0, sizeof(tib->tib_locks));
// run user fork callbacks
fork_child();
// free threads
if (_weaken(_pthread_free)) {
while ((e = dll_first(old_threads))) {
pt = POSIXTHREAD_CONTAINER(e);
atomic_init(&pt->tib->tib_syshand, 0);
dll_remove(&old_threads, e);
_weaken(_pthread_free)(pt);
}
}
STRACE("fork() → 0 (child of %d; took %ld us)", parent, micros);
} else {
// this is the parent process

View file

@ -535,7 +535,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16);
tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel);
wt->this = tid = (tid & (kMaxThreadIds - 1)) + kMinThreadId;
wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
@ -550,9 +550,9 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
unassert(!__syslib->__pthread_attr_init(attr));
unassert(!__syslib->__pthread_attr_setguardsize(attr, 0));
unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack));
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt)) &&
(flags & CLONE_PARENT_SETTID)) {
*ptid = tid;
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) {
if (flags & CLONE_PARENT_SETTID)
*ptid = tid;
if (flags & CLONE_SETTLS) {
struct CosmoTib *tib = tls;
atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release);

View file

@ -94,7 +94,7 @@ struct PosixThread {
typedef void (*atfork_f)(void);
extern struct Dll *_pthread_list;
extern _Atomic(unsigned) _pthread_count;
extern atomic_uint _pthread_count;
extern struct PosixThread _pthread_static;
extern _Atomic(pthread_key_dtor) _pthread_key_dtor[PTHREAD_KEYS_MAX];
@ -109,6 +109,7 @@ int _pthread_tid(struct PosixThread *) libcesque;
intptr_t _pthread_syshand(struct PosixThread *) libcesque;
long _pthread_cancel_ack(void) libcesque;
void _pthread_decimate(void) libcesque;
void _pthread_free(struct PosixThread *) libcesque paramsnonnull();
void _pthread_lock(void) libcesque;
void _pthread_onfork_child(void) libcesque;
void _pthread_onfork_parent(void) libcesque;

View file

@ -67,7 +67,7 @@ __static_yoink("_pthread_onfork_prepare");
__static_yoink("_pthread_onfork_parent");
__static_yoink("_pthread_onfork_child");
static void _pthread_free(struct PosixThread *pt) {
void _pthread_free(struct PosixThread *pt) {
// thread must be removed from _pthread_list before calling
unassert(dll_is_alone(&pt->list) && &pt->list != _pthread_list);
@ -84,7 +84,7 @@ static void _pthread_free(struct PosixThread *pt) {
// free any additional upstream system resources
// our fork implementation wipes this handle in child automatically
uint64_t syshand =
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_acquire);
atomic_load_explicit(&pt->tib->tib_syshand, memory_order_relaxed);
if (syshand) {
if (IsWindows())
unassert(CloseHandle(syshand)); // non-inheritable

View file

@ -62,33 +62,34 @@ static const char *DescribeReturnValue(char buf[30], int err, void **value) {
* @cancelationpoint
*/
static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
int x, e;
errno_t err = 0;
if (ctid == &__get_tls()->tib_tid) {
// "If an implementation detects that the value specified by the
// thread argument to pthread_join() refers to the calling thread,
// it is recommended that the function should fail and report an
// [EDEADLK] error." ──Quoth POSIX.1-2017
err = EDEADLK;
} else {
// "If the thread calling pthread_join() is canceled, then the target
// thread shall not be detached." ──Quoth POSIX.1-2017
if (!(err = pthread_testcancel_np())) {
BEGIN_CANCELATION_POINT;
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME,
// "If an implementation detects that the value specified by the
// thread argument to pthread_join() refers to the calling thread,
// it is recommended that the function should fail and report an
// [EDEADLK] error." ──Quoth POSIX.1-2017
if (ctid == &__get_tls()->tib_tid)
return EDEADLK;
// "If the thread calling pthread_join() is canceled, then the target
// thread shall not be detached." ──Quoth POSIX.1-2017
errno_t err;
if ((err = pthread_testcancel_np()))
return err;
BEGIN_CANCELATION_POINT;
int x;
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
int e = cosmo_futex_wait(ctid, x, !IsWindows() && !IsXnu(), CLOCK_REALTIME,
abstime);
if (e == -ECANCELED) {
err = ECANCELED;
break;
} else if (e == -ETIMEDOUT) {
err = EBUSY;
break;
}
}
END_CANCELATION_POINT;
if (e == -ECANCELED) {
err = ECANCELED;
break;
} else if (e == -ETIMEDOUT) {
err = EBUSY;
break;
}
}
END_CANCELATION_POINT;
return err;
}
@ -117,12 +118,11 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) {
errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
struct timespec *abstime) {
int tid;
errno_t err = 0;
errno_t err;
struct PosixThread *pt;
enum PosixThreadStatus status;
pt = (struct PosixThread *)thread;
unassert(thread);
_pthread_ref(pt);
// "The behavior is undefined if the value specified by the thread
// argument to pthread_join() does not refer to a joinable thread."
@ -135,14 +135,23 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr,
// specifying the same target thread are undefined."
// ──Quoth POSIX.1-2017
if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) {
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_zombify(pt);
if (value_ptr)
*value_ptr = pt->pt_val;
if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire)) {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
dll_make_last(&_pthread_list, &pt->list);
atomic_store_explicit(&pt->pt_status, kPosixThreadZombie,
memory_order_release);
_pthread_unlock();
} else {
_pthread_lock();
dll_remove(&_pthread_list, &pt->list);
_pthread_unlock();
_pthread_free(pt);
}
}
_pthread_unref(pt);
STRACE("pthread_timedjoin_np(%d, %s, %s) → %s", tid,
DescribeReturnValue(alloca(30), err, value_ptr),
DescribeTimespec(err ? -1 : 0, abstime), DescribeErrno(err));