diff --git a/libc/intrin/gettid.c b/libc/intrin/gettid.c index fe30e434a..48c7c9e42 100644 --- a/libc/intrin/gettid.c +++ b/libc/intrin/gettid.c @@ -39,7 +39,7 @@ int gettid(void) { int tid; if (VERY_LIKELY(__tls_enabled && !__vforked)) { - tid = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + tid = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); if (VERY_LIKELY(tid > 0)) return tid; } diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index eb70ce94f..a303723c5 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -561,7 +561,7 @@ ABI static size_t kformat(char *b, size_t n, const char *fmt, va_list va) { tib = __tls_enabled ? __get_tls_privileged() : 0; if (!(tib && (tib->tib_flags & TIB_FLAG_VFORKED))) { if (tib) { - x = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed); + x = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed); } else { x = __pid; } diff --git a/libc/intrin/maps.c b/libc/intrin/maps.c index b95688de3..8a3f0b054 100644 --- a/libc/intrin/maps.c +++ b/libc/intrin/maps.c @@ -129,7 +129,7 @@ bool __maps_held(void) { return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) && MUTEX_OWNER( atomic_load_explicit(&__maps.lock.word, memory_order_relaxed)) == - atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); } ABI void __maps_lock(void) { @@ -142,7 +142,7 @@ ABI void __maps_lock(void) { return; if (tib->tib_flags & TIB_FLAG_VFORKED) return; - me = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed); + me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed); if (me <= 0) return; word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed); @@ -192,7 +192,7 @@ ABI void __maps_unlock(void) { return; if (tib->tib_flags & TIB_FLAG_VFORKED) return; - me = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed); + me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed); if (me <= 0) return; word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed); diff --git a/libc/intrin/pthread_mutex_lock.c b/libc/intrin/pthread_mutex_lock.c index a4447ed41..8ee1daa12 100644 --- a/libc/intrin/pthread_mutex_lock.c +++ b/libc/intrin/pthread_mutex_lock.c @@ -69,7 +69,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex, uint64_t word, bool is_trylock) { uint64_t lock; int backoff = 0; - int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); bool once = false; for (;;) { if (MUTEX_OWNER(word) == me) { @@ -119,7 +119,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex, static errno_t pthread_mutex_lock_recursive_nsync(pthread_mutex_t *mutex, uint64_t word, bool is_trylock) { - int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); for (;;) { if (MUTEX_OWNER(word) == me) { if (MUTEX_DEPTH(word) < MUTEX_DEPTH_MAX) { diff --git a/libc/intrin/pthread_mutex_unlock.c b/libc/intrin/pthread_mutex_unlock.c index f6df0b1aa..25525dccb 100644 --- a/libc/intrin/pthread_mutex_unlock.c +++ b/libc/intrin/pthread_mutex_unlock.c @@ -44,7 +44,7 @@ static void pthread_mutex_unlock_drepper(atomic_int *futex, char pshare) { static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex, uint64_t word) { - int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); for (;;) { // we allow unlocking an initialized lock that wasn't locked, but we @@ -76,7 +76,7 @@ static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex, #if PTHREAD_USE_NSYNC static errno_t pthread_mutex_unlock_recursive_nsync(pthread_mutex_t *mutex, uint64_t word) { - int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed); + int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed); for (;;) { // we allow unlocking an initialized lock that wasn't locked, but we diff --git a/libc/intrin/pthread_tid.c b/libc/intrin/pthread_tid.c index 4f7553e9a..fb9d22f44 100644 --- a/libc/intrin/pthread_tid.c +++ b/libc/intrin/pthread_tid.c @@ -21,9 +21,25 @@ #include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" +// +// - tib_ptid: always guaranteed to be non-zero in thread itself. on +// some platforms (e.g. xnu) the parent thread and other +// threads may need to wait for this value to be set. this +// is generally the value you want to read to get the tid. +// +// - tib_ctid: starts off as -1. once thread starts, it's set to the +// thread's tid before calling the thread callback. when +// thread is done executing, this is set to zero, and then +// this address is futex woken, in case the parent thread or +// any other thread is waiting on its completion. when a +// thread wants to read its own tid, it shouldn't use this, +// because the thread might need to do things after clearing +// its own tib_ctid (see pthread_exit() for static thread). +// int _pthread_tid(struct PosixThread *pt) { int tid = 0; - while (pt && !(tid = atomic_load_explicit(&pt->ptid, memory_order_acquire))) + while (pt && !(tid = atomic_load_explicit(&pt->tib->tib_ptid, + memory_order_acquire))) pthread_yield_np(); return tid; } diff --git a/libc/intrin/wintlsinit.c b/libc/intrin/wintlsinit.c index a678a0d2d..d14798d06 100644 --- a/libc/intrin/wintlsinit.c +++ b/libc/intrin/wintlsinit.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/atomic.h" #include "libc/log/libfatal.internal.h" #include "libc/nt/thread.h" #include "libc/nt/thunk/msabi.h" @@ -38,7 +39,9 @@ textwindows dontinstrument void __bootstrap_tls(struct CosmoTib *tib, tib->tib_ftrace = __ftrace; tib->tib_sigstack_size = 57344; tib->tib_sigstack_addr = bp - 57344; - tib->tib_tid = __imp_GetCurrentThreadId(); + int tid = __imp_GetCurrentThreadId(); + atomic_init(&tib->tib_ptid, tid); + atomic_init(&tib->tib_ctid, tid); __set_tls_win32(tib); } diff --git a/libc/mem/leaks.c b/libc/mem/leaks.c index ec422cb3b..97febe422 100644 --- a/libc/mem/leaks.c +++ b/libc/mem/leaks.c @@ -79,7 +79,7 @@ void CheckForMemoryLeaks(void) { // validate usage of this api if (_weaken(_pthread_decimate)) - _weaken(_pthread_decimate)(); + _weaken(_pthread_decimate)(kPosixThreadZombie); if (!pthread_orphan_np()) kprintf("warning: called CheckForMemoryLeaks() from non-orphaned thread\n"); diff --git a/libc/proc/fork-nt.c b/libc/proc/fork-nt.c index c42140517..d527e641a 100644 --- a/libc/proc/fork-nt.c +++ b/libc/proc/fork-nt.c @@ -465,9 +465,6 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) { // re-apply code morphing for function tracing if (ftrace_stackdigs) _weaken(__hook)(_weaken(ftrace_hook), _weaken(GetSymbolTable)()); - // notify pthread join - atomic_store_explicit(&_pthread_static.ptid, GetCurrentThreadId(), - memory_order_release); } if (rc == -1) dll_make_first(&__proc.free, &proc->elem); diff --git a/libc/proc/fork.c b/libc/proc/fork.c index a90d2f5ef..cefa51fb6 100644 --- a/libc/proc/fork.c +++ b/libc/proc/fork.c @@ -59,7 +59,6 @@ extern pthread_mutex_t __sig_worker_lock; void __dlopen_lock(void); void __dlopen_unlock(void); -void nsync_mu_semaphore_sem_fork_child(void); // first and last and always // it is the lord of all locks @@ -147,7 +146,6 @@ static void fork_parent(void) { } static void fork_child(void) { - nsync_mu_semaphore_sem_fork_child(); _pthread_mutex_wipe_np(&__dlopen_lock_obj); _pthread_mutex_wipe_np(&__rand64_lock_obj); _pthread_mutex_wipe_np(&__fds_lock_obj); @@ -204,8 +202,8 @@ int _fork(uint32_t dwCreationFlags) { struct CosmoTib *tib = __get_tls(); struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread; tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid(); - atomic_init(&tib->tib_tid, tid); - atomic_init(&pt->ptid, tid); + atomic_init(&tib->tib_ctid, tid); + atomic_init(&tib->tib_ptid, tid); // tracing and kisdangerous need this lock wiped a little earlier atomic_init(&__maps.lock.word, 0); @@ -214,6 +212,11 @@ int _fork(uint32_t dwCreationFlags) { * it's now safe to call normal functions again */ + // this wipe must happen fast + void nsync_waiter_wipe_(void); + if (_weaken(nsync_waiter_wipe_)) + _weaken(nsync_waiter_wipe_)(); + // turn other threads into zombies // we can't free() them since we're monopolizing all locks // we assume the operating system already reclaimed system handles diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index a3b35c690..da998b3f5 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -120,11 +120,13 @@ WinThreadEntry(int rdi, // rcx int rc; if (wt->tls) __set_tls_win32(wt->tls); - *wt->ctid = __imp_GetCurrentThreadId(); + int tid = __imp_GetCurrentThreadId(); + atomic_init(wt->ptid, tid); + atomic_init(wt->ctid, tid); rc = __stack_call(wt->arg, wt->tid, 0, 0, wt->func, wt->sp); // we can now clear ctid directly since we're no longer using our own // stack memory, which can now be safely free'd by the parent thread. - *wt->ztid = 0; + atomic_store_explicit(wt->ztid, 0, memory_order_release); __imp_WakeByAddressAll(wt->ztid); // since we didn't indirect this function through NT2SYSV() it's not // safe to simply return, and as such, we need ExitThread(). @@ -146,6 +148,7 @@ static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk, sp &= -alignof(struct CloneArgs); wt = (struct CloneArgs *)sp; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->func = func; wt->arg = arg; @@ -154,7 +157,7 @@ static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk, if ((h = CreateThread(&kNtIsInheritable, 65536, (void *)WinThreadEntry, wt, kNtStackSizeParamIsAReservation, &utid))) { if (flags & CLONE_PARENT_SETTID) - *ptid = utid; + atomic_init(ptid, utid); if (flags & CLONE_SETTLS) { struct CosmoTib *tib = tls; atomic_store_explicit(&tib->tib_syshand, h, memory_order_release); @@ -192,8 +195,8 @@ XnuThreadMain(void *pthread, // rdi int ax; wt->tid = tid; - *wt->ctid = tid; - *wt->ptid = tid; + atomic_init(wt->ctid, tid); + atomic_init(wt->ptid, tid); if (wt->tls) { // XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the @@ -250,8 +253,8 @@ static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, wt = (struct CloneArgs *)sp; // pass parameters to new thread via xnu - wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = flags & CLONE_SETTLS ? tls : 0; return sys_clone_xnu(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU); @@ -264,7 +267,8 @@ static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, // 1. __asan_handle_no_return wipes stack [todo?] relegated static wontreturn void OpenbsdThreadMain(void *p) { struct CloneArgs *wt = p; - *wt->ctid = wt->tid; + atomic_init(wt->ptid, wt->tid); + atomic_init(wt->ctid, wt->tid); wt->func(wt->arg, wt->tid); asm volatile("mov\t%2,%%rsp\n\t" // so syscall can validate stack exists "movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0 (old stack now free'd) @@ -295,6 +299,7 @@ relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk, wt = (struct CloneArgs *)sp; sp = AlignStack(sp, stk, stksz, 16); wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->arg = arg; wt->func = func; @@ -303,7 +308,7 @@ relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk, tf->tf_tid = &wt->tid; if ((rc = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) >= 0) { if (flags & CLONE_PARENT_SETTID) - *ptid = rc; + atomic_init(ptid, rc); return 0; } else { return -rc; @@ -316,13 +321,16 @@ relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk, static wontreturn void NetbsdThreadMain(void *arg, // rdi int (*func)(void *, int), // rsi int flags, // rdx - atomic_int *ctid) { // rcx + atomic_int *ctid, // rcx + atomic_int *ptid) { // r8 int ax, dx; static atomic_int clobber; atomic_int *ztid = &clobber; ax = sys_gettid(); if (flags & CLONE_CHILD_SETTID) - atomic_store_explicit(ctid, ax, memory_order_release); + atomic_init(ctid, ax); + if (flags & CLONE_PARENT_SETTID) + atomic_init(ptid, ax); if (flags & CLONE_CHILD_CLEARTID) ztid = ctid; func(arg, ax); @@ -381,6 +389,7 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz, ctx->uc_mcontext.rsi = (intptr_t)func; ctx->uc_mcontext.rdx = flags; ctx->uc_mcontext.rcx = (intptr_t)ctid; + ctx->uc_mcontext.r8 = (intptr_t)ptid; ctx->uc_flags |= _UC_STACK; ctx->uc_stack.ss_sp = stk; ctx->uc_stack.ss_size = stksz; @@ -399,7 +408,7 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz, if (!failed) { unassert(tid); if (flags & CLONE_PARENT_SETTID) - *ptid = tid; + atomic_init(ptid, tid); return 0; } else { return ax; @@ -418,7 +427,8 @@ static wontreturn void FreebsdThreadMain(void *p) { #elif defined(__x86_64__) sys_set_tls(AMD64_SET_GSBASE, wt->tls); #endif - *wt->ctid = wt->tid; + atomic_init(wt->ctid, wt->tid); + atomic_init(wt->ptid, wt->tid); wt->func(wt->arg, wt->tid); // we no longer use the stack after this point // void thr_exit(%rdi = long *state); @@ -465,6 +475,7 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, wt = (struct CloneArgs *)sp; sp = AlignStack(sp, stk, stksz, 16); wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = tls; wt->func = func; @@ -499,7 +510,7 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, #error "unsupported architecture" #endif if (flags & CLONE_PARENT_SETTID) - *ptid = tid; + atomic_init(ptid, tid); return 0; } @@ -511,9 +522,10 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, static void *SiliconThreadMain(void *arg) { struct CloneArgs *wt = arg; asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls)); - *wt->ctid = wt->this; + atomic_init(wt->ctid, wt->this); + atomic_init(wt->ptid, wt->this); __stack_call(wt->arg, wt->this, 0, 0, wt->func, wt->sp); - *wt->ztid = 0; + atomic_store_explicit(wt->ztid, 0, memory_order_release); ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, wt->ztid, 0); return 0; } @@ -537,6 +549,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel); wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = flags & CLONE_SETTLS ? tls : 0; wt->func = fn; @@ -552,7 +565,7 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack)); if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) { if (flags & CLONE_PARENT_SETTID) - *ptid = tid; + atomic_init(ptid, tid); if (flags & CLONE_SETTLS) { struct CosmoTib *tib = tls; atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release); @@ -637,7 +650,7 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, * If you use clone() you're on your own. Example: * * int worker(void *arg) { return 0; } - * struct CosmoTib tib = {.tib_self = &tib, .tib_tid = -1}; + * struct CosmoTib tib = {.tib_self = &tib, .tib_ctid = -1}; * atomic_int tid; * char *stk = NewCosmoStack(); * clone(worker, stk, GetStackSize() - 16, @@ -647,9 +660,9 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, * arg, &tid, &tib, &tib.tib_tid); * while (atomic_load(&tid) == 0) sched_yield(); * // thread is known - * while (atomic_load(&tib.tib_tid) < 0) sched_yield(); + * while (atomic_load(&tib.tib_ctid) < 0) sched_yield(); * // thread is running - * while (atomic_load(&tib.tib_tid) > 0) sched_yield(); + * while (atomic_load(&tib.tib_ctid) > 0) sched_yield(); * // thread has terminated * FreeCosmoStack(stk); * diff --git a/libc/runtime/cosmo2.c b/libc/runtime/cosmo2.c index d2a80c66e..a218af579 100644 --- a/libc/runtime/cosmo2.c +++ b/libc/runtime/cosmo2.c @@ -93,7 +93,8 @@ wontreturn textstartup void cosmo(long *sp, struct Syslib *m1, char *exename, .tib_sigmask = -1, .tib_sigstack_size = 57344, .tib_sigstack_addr = (char *)__builtin_frame_address(0) - 57344, - .tib_tid = 1, + .tib_ptid = 1, + .tib_ctid = 1, }; __set_tls(&tib); diff --git a/libc/runtime/cxa_thread_atexit.c b/libc/runtime/cxa_thread_atexit.c index 76b89ec89..57ce06849 100644 --- a/libc/runtime/cxa_thread_atexit.c +++ b/libc/runtime/cxa_thread_atexit.c @@ -23,7 +23,6 @@ #include "libc/nexgen32e/gc.internal.h" #include "libc/thread/posixthread.internal.h" #include "libc/thread/tls.h" -#include "third_party/nsync/wait_s.internal.h" struct Dtor { void *fun; @@ -89,10 +88,7 @@ void __cxa_thread_finalize(void) { // thread has any thread-specific data, appropriate destructor // functions shall be called in an unspecified order." // ──Quoth POSIX.1-2017 - if (tib->tib_nsync) - _weaken(nsync_waiter_destroy)(tib->tib_nsync); _pthread_unkey(tib); - _pthread_ungarbage(tib); while ((dtor = tib->tib_atexit)) { diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c index 5847a18f9..0296e6fda 100644 --- a/libc/runtime/enable_tls.c +++ b/libc/runtime/enable_tls.c @@ -233,7 +233,8 @@ textstartup void __enable_tls(void) { } else { tid = sys_gettid(); } - atomic_init(&tib->tib_tid, tid); + atomic_init(&tib->tib_ptid, tid); + atomic_init(&tib->tib_ctid, tid); // TODO(jart): set_tid_address? // inherit signal mask @@ -248,7 +249,6 @@ textstartup void __enable_tls(void) { _pthread_static.pt_attr.__stacksize = __maps.stack.size; dll_init(&_pthread_static.list); _pthread_list = &_pthread_static.list; - atomic_init(&_pthread_static.ptid, tid); // ask the operating system to change the x86 segment register if (IsWindows()) diff --git a/libc/testlib/testmain.c b/libc/testlib/testmain.c index aaa74a6ed..0abda83e1 100644 --- a/libc/testlib/testmain.c +++ b/libc/testlib/testmain.c @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) { // make sure threads are in a good state if (_weaken(_pthread_decimate)) - _weaken(_pthread_decimate)(); + _weaken(_pthread_decimate)(kPosixThreadZombie); if (_weaken(pthread_orphan_np) && !_weaken(pthread_orphan_np)()) { tinyprint(2, "error: tests ended with threads still active\n", NULL); _Exit(1); diff --git a/libc/thread/mktls.c b/libc/thread/mktls.c index b48ea3137..20d574b93 100644 --- a/libc/thread/mktls.c +++ b/libc/thread/mktls.c @@ -40,10 +40,9 @@ static char *_mktls_finish(struct CosmoTib **out_tib, char *mem, tib->tib_ftrace = old->tib_ftrace; tib->tib_strace = old->tib_strace; tib->tib_sigmask = old->tib_sigmask; - atomic_store_explicit(&tib->tib_tid, -1, memory_order_relaxed); - if (out_tib) { + atomic_init(&tib->tib_ctid, -1); + if (out_tib) *out_tib = tib; - } return mem; } diff --git a/libc/thread/posixthread.internal.h b/libc/thread/posixthread.internal.h index 8468f43c2..6a4cfa514 100644 --- a/libc/thread/posixthread.internal.h +++ b/libc/thread/posixthread.internal.h @@ -75,7 +75,6 @@ struct PosixThread { atomic_int pt_canceled; // 0x04: thread has bad beliefs _Atomic(enum PosixThreadStatus) pt_status; _Atomic(atomic_int *) pt_blocker; - atomic_int ptid; // transitions 0 → tid atomic_int pt_refs; // prevents decimation void *(*pt_start)(void *); // creation callback void *pt_val; // start param / return val @@ -108,7 +107,7 @@ int _pthread_setschedparam_freebsd(int, int, const struct sched_param *); int _pthread_tid(struct PosixThread *) libcesque; intptr_t _pthread_syshand(struct PosixThread *) libcesque; long _pthread_cancel_ack(void) libcesque; -void _pthread_decimate(void) libcesque; +void _pthread_decimate(enum PosixThreadStatus) libcesque; void _pthread_free(struct PosixThread *) libcesque paramsnonnull(); void _pthread_lock(void) libcesque; void _pthread_onfork_child(void) libcesque; diff --git a/libc/thread/pthread_create.c b/libc/thread/pthread_create.c index 1207d03b6..8a5c52c02 100644 --- a/libc/thread/pthread_create.c +++ b/libc/thread/pthread_create.c @@ -57,6 +57,7 @@ #include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" #include "libc/thread/tls.h" +#include "third_party/nsync/wait_s.internal.h" __static_yoink("nsync_mu_lock"); __static_yoink("nsync_mu_unlock"); @@ -81,6 +82,10 @@ void _pthread_free(struct PosixThread *pt) { cosmo_stack_free(pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize, pt->pt_attr.__guardsize); + // reclaim thread's cached nsync waiter object + if (pt->tib->tib_nsync) + nsync_waiter_destroy_(pt->tib->tib_nsync); + // free any additional upstream system resources // our fork implementation wipes this handle in child automatically uint64_t syshand = @@ -102,7 +107,7 @@ void _pthread_free(struct PosixThread *pt) { 3); } -void _pthread_decimate(void) { +void _pthread_decimate(enum PosixThreadStatus threshold) { struct PosixThread *pt; struct Dll *e, *e2, *list = 0; enum PosixThreadStatus status; @@ -117,11 +122,18 @@ void _pthread_decimate(void) { pt = POSIXTHREAD_CONTAINER(e); if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire) > 0) continue; // pthread_kill() has a lease on this thread + if (atomic_load_explicit(&pt->tib->tib_ctid, memory_order_acquire)) + continue; // thread is still using stack so leave alone status = atomic_load_explicit(&pt->pt_status, memory_order_acquire); - if (status != kPosixThreadZombie) - break; // zombies only exist at the end of the linked list - if (atomic_load_explicit(&pt->tib->tib_tid, memory_order_acquire)) - continue; // undead thread that should stop existing soon + if (status < threshold) { + if (threshold == kPosixThreadZombie) + break; // zombies only exist at the end of the linked list + continue; + } + if (status == kPosixThreadTerminated) + if (!(pt->pt_flags & PT_STATIC)) + STRACE("warning: you forgot to join or detach thread id %d", + atomic_load_explicit(&pt->tib->tib_ptid, memory_order_acquire)); dll_remove(&_pthread_list, e); dll_make_first(&list, e); } @@ -139,7 +151,7 @@ void _pthread_decimate(void) { } } -static int PosixThread(void *arg, int tid) { +dontinstrument static int PosixThread(void *arg, int tid) { struct PosixThread *pt = arg; // setup scheduling @@ -285,12 +297,12 @@ static errno_t pthread_create_impl(pthread_t *thread, _pthread_ref(pt); // launch PosixThread(pt) in new thread - if ((rc = clone(PosixThread, pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize, - CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_SYSVSEM | CLONE_SETTLS | - CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID, - pt, &pt->ptid, __adj_tls(pt->tib), &pt->tib->tib_tid))) { + if ((rc = clone( + PosixThread, pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize, + CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID | + CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, + pt, &pt->tib->tib_ptid, __adj_tls(pt->tib), &pt->tib->tib_ctid))) { _pthread_lock(); dll_remove(&_pthread_list, &pt->list); _pthread_unlock(); @@ -363,7 +375,7 @@ static const char *DescribeHandle(char buf[12], errno_t err, pthread_t *th) { errno_t pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { errno_t err; - _pthread_decimate(); + _pthread_decimate(kPosixThreadZombie); BLOCK_SIGNALS; err = pthread_create_impl(thread, attr, start_routine, arg, _SigMask); ALLOW_SIGNALS; diff --git a/libc/thread/pthread_decimate_np.c b/libc/thread/pthread_decimate_np.c index 93d8e5d7f..8299db3a2 100644 --- a/libc/thread/pthread_decimate_np.c +++ b/libc/thread/pthread_decimate_np.c @@ -41,7 +41,7 @@ * @return 0 on success, or errno on error */ int pthread_decimate_np(void) { - _pthread_decimate(); + _pthread_decimate(kPosixThreadZombie); cosmo_stack_clear(); return 0; } diff --git a/libc/thread/pthread_exit.c b/libc/thread/pthread_exit.c index 6c8d605bc..6f8199203 100644 --- a/libc/thread/pthread_exit.c +++ b/libc/thread/pthread_exit.c @@ -18,11 +18,13 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" #include "libc/atomic.h" +#include "libc/calls/calls.h" #include "libc/cosmo.h" #include "libc/cxxabi.h" #include "libc/dce.h" #include "libc/intrin/atomic.h" #include "libc/intrin/cxaatexit.h" +#include "libc/intrin/describebacktrace.h" #include "libc/intrin/strace.h" #include "libc/intrin/weaken.h" #include "libc/limits.h" @@ -97,13 +99,15 @@ wontreturn void pthread_exit(void *rc) { // notice how we avoid acquiring the pthread gil if (!(population = atomic_fetch_sub(&_pthread_count, 1) - 1)) { // we know for certain we're an orphan. any other threads that - // exist, will terminate and clear their tid very soon. but... - // some goofball could spawn more threads from atexit handlers + // exist, will terminate and clear their tid very soon. but some + // goofball could spawn more threads from atexit() handlers. we'd + // also like to avoid looping forever here, by auto-joining threads + // that leaked, because the user forgot to join them or detach them for (;;) { - _pthread_decimate(); + if (_weaken(__cxa_finalize)) + _weaken(__cxa_finalize)(NULL); + _pthread_decimate(kPosixThreadTerminated); if (pthread_orphan_np()) { - if (_weaken(__cxa_finalize)) - _weaken(__cxa_finalize)(NULL); population = atomic_load(&_pthread_count); break; } @@ -147,8 +151,8 @@ wontreturn void pthread_exit(void *rc) { // check if the main thread has died whilst children live // note that the main thread is joinable by child threads if (pt->pt_flags & PT_STATIC) { - atomic_store_explicit(&tib->tib_tid, 0, memory_order_release); - cosmo_futex_wake((atomic_int *)&tib->tib_tid, INT_MAX, + atomic_store_explicit(&tib->tib_ctid, 0, memory_order_release); + cosmo_futex_wake((atomic_int *)&tib->tib_ctid, INT_MAX, !IsWindows() && !IsXnu()); _Exit1(0); } diff --git a/libc/thread/pthread_timedjoin_np.c b/libc/thread/pthread_timedjoin_np.c index 8cfe73282..cd1643b8d 100644 --- a/libc/thread/pthread_timedjoin_np.c +++ b/libc/thread/pthread_timedjoin_np.c @@ -67,7 +67,7 @@ static errno_t _pthread_wait(atomic_int *ctid, struct timespec *abstime) { // thread argument to pthread_join() refers to the calling thread, // it is recommended that the function should fail and report an // [EDEADLK] error." ──Quoth POSIX.1-2017 - if (ctid == &__get_tls()->tib_tid) + if (ctid == &__get_tls()->tib_ctid) return EDEADLK; // "If the thread calling pthread_join() is canceled, then the target @@ -134,7 +134,7 @@ errno_t pthread_timedjoin_np(pthread_t thread, void **value_ptr, // "The results of multiple simultaneous calls to pthread_join() // specifying the same target thread are undefined." // ──Quoth POSIX.1-2017 - if (!(err = _pthread_wait(&pt->tib->tib_tid, abstime))) { + if (!(err = _pthread_wait(&pt->tib->tib_ctid, abstime))) { if (value_ptr) *value_ptr = pt->pt_val; if (atomic_load_explicit(&pt->pt_refs, memory_order_acquire)) { diff --git a/libc/thread/tls.h b/libc/thread/tls.h index daf661835..123beac72 100644 --- a/libc/thread/tls.h +++ b/libc/thread/tls.h @@ -23,10 +23,10 @@ struct CosmoTib { struct CosmoTib *tib_self; /* 0x00 */ struct CosmoFtrace tib_ftracer; /* 0x08 */ void *tib_garbages; /* 0x18 */ - intptr_t __unused; /* 0x20 */ + _Atomic(int32_t) tib_ptid; /* 0x20 transitions 0 → tid */ intptr_t tib_pthread; /* 0x28 */ struct CosmoTib *tib_self2; /* 0x30 */ - _Atomic(int32_t) tib_tid; /* 0x38 transitions -1 → tid → 0 */ + _Atomic(int32_t) tib_ctid; /* 0x38 transitions -1 → tid → 0 */ int32_t tib_errno; /* 0x3c */ uint64_t tib_flags; /* 0x40 */ int tib_ftrace; /* inherited */ diff --git a/test/libc/intrin/lock_test.c b/test/libc/intrin/lock_test.c index f52eb07a5..b73a94f85 100644 --- a/test/libc/intrin/lock_test.c +++ b/test/libc/intrin/lock_test.c @@ -118,10 +118,15 @@ void TestContendedLock(const char *name, int kind) { char *stk; double ns; errno_t rc; + int x, i, n = 10000; struct timespec t1, t2; pthread_mutexattr_t attr; - int tid, x, i, n = 10000; - struct CosmoTib tib = {.tib_self = &tib, .tib_self2 = &tib, .tib_tid = -1}; + struct CosmoTib tib = { + .tib_self = &tib, + .tib_self2 = &tib, + .tib_ctid = -1, + .tib_ptid = 0, + }; pthread_mutexattr_init(&attr); pthread_mutexattr_settype(&attr, kind); pthread_mutex_init(&mu, &attr); @@ -133,7 +138,7 @@ void TestContendedLock(const char *name, int kind) { CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SYSVSEM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - 0, &tid, &tib, &tib.tib_tid); + 0, &tib.tib_ptid, &tib, &tib.tib_ctid); if (rc) { kprintf("clone failed: %s\n", strerror(rc)); _Exit(1); @@ -149,7 +154,7 @@ void TestContendedLock(const char *name, int kind) { ASSERT_EQ(0, pthread_mutex_unlock(&mu)); } t2 = timespec_real(); - while (tib.tib_tid) + while (tib.tib_ctid) donothing; ASSERT_EQ(1, atomic_load(&success)); ASSERT_EQ(0, atomic_load(&counter)); diff --git a/test/libc/thread/pthread_create_test.c b/test/libc/thread/pthread_create_test.c index c4daf45ff..92b6c28db 100644 --- a/test/libc/thread/pthread_create_test.c +++ b/test/libc/thread/pthread_create_test.c @@ -70,7 +70,6 @@ void OnUsr1(int sig, siginfo_t *si, void *vctx) { void SetUpOnce(void) { cosmo_stack_setmaxstacks((_rand64() & 7) - 1); - cosmo_stack_setmaxstacks(100); } void SetUp(void) { diff --git a/test/libc/thread/pthread_kill_test.c b/test/libc/thread/pthread_kill_test.c index f6494b137..2ac31f4be 100644 --- a/test/libc/thread/pthread_kill_test.c +++ b/test/libc/thread/pthread_kill_test.c @@ -259,7 +259,6 @@ void *CpuWorker(void *arg) { } TEST(pthread_kill, canAsynchronouslyRunHandlerInsideTargetThread) { - ASSERT_NE(0, __get_tls()->tib_tid); pthread_t t; struct sigaction oldsa; struct sigaction sa = {.sa_handler = OnSigAsync}; @@ -273,7 +272,6 @@ TEST(pthread_kill, canAsynchronouslyRunHandlerInsideTargetThread) { ASSERT_TRUE(exited_original_loop); ASSERT_SYS(0, 0, sigaction(SIGUSR1, &oldsa, 0)); ASSERT_EQ(0, gotsig); - ASSERT_NE(0, __get_tls()->tib_tid); } volatile int is_having_fun; @@ -287,7 +285,6 @@ void *FunWorker(void *arg) { } TEST(pthread_kill, defaultThreadSignalHandlerWillKillWholeProcess) { - ASSERT_NE(0, __get_tls()->tib_tid); SPAWN(fork); pthread_t t; ASSERT_EQ(0, pthread_create(&t, 0, FunWorker, 0)); @@ -297,7 +294,6 @@ TEST(pthread_kill, defaultThreadSignalHandlerWillKillWholeProcess) { for (;;) sched_yield(); TERMS(SIGKILL); - ASSERT_NE(0, __get_tls()->tib_tid); } void *SuspendWorker(void *arg) { @@ -308,7 +304,6 @@ void *SuspendWorker(void *arg) { } TEST(pthread_kill, canInterruptSigsuspend) { - ASSERT_NE(0, __get_tls()->tib_tid); int tid; void *res; pthread_t t; diff --git a/third_party/nsync/common.c b/third_party/nsync/common.c index 80f695a47..352168049 100644 --- a/third_party/nsync/common.c +++ b/third_party/nsync/common.c @@ -17,21 +17,18 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/atomic.h" #include "libc/calls/calls.h" -#include "libc/calls/syscall-sysv.internal.h" +#include "libc/calls/calls.h" #include "libc/dce.h" -#include "libc/intrin/directmap.h" +#include "libc/fmt/itoa.h" #include "libc/intrin/dll.h" -#include "libc/intrin/extend.h" -#include "libc/nt/enum/filemapflags.h" -#include "libc/nt/enum/pageflags.h" -#include "libc/nt/memory.h" -#include "libc/nt/runtime.h" -#include "libc/runtime/memtrack.internal.h" +#include "libc/intrin/kprintf.h" +#include "libc/intrin/weaken.h" #include "libc/runtime/runtime.h" #include "libc/stdalign.h" -#include "libc/stdalign.h" +#include "libc/str/str.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" +#include "libc/thread/posixthread.internal.h" #include "libc/thread/thread.h" #include "libc/thread/tls.h" #include "third_party/nsync/atomic.h" @@ -39,8 +36,7 @@ #include "third_party/nsync/common.internal.h" #include "third_party/nsync/mu_semaphore.h" #include "third_party/nsync/mu_semaphore.internal.h" -#include "libc/intrin/kprintf.h" -#include "libc/intrin/strace.h" +#include "libc/intrin/cxaatexit.h" #include "third_party/nsync/wait_s.internal.h" __static_yoink("nsync_notice"); @@ -139,6 +135,9 @@ waiter *nsync_dll_waiter_samecond_ (struct Dll *e) { /* -------------------------------- */ +// TODO(jart): enforce in dbg mode once off-by-one flake is fixed +#define DETECT_WAITER_LEAKS 0 + #define MASQUE 0x00fffffffffffff8 #define PTR(x) ((uintptr_t)(x) & MASQUE) #define TAG(x) ROL((uintptr_t)(x) & ~MASQUE, 8) @@ -147,6 +146,54 @@ waiter *nsync_dll_waiter_samecond_ (struct Dll *e) { #define ROR(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) static atomic_uintptr_t free_waiters; +static _Atomic(waiter *) all_waiters; + +#if DETECT_WAITER_LEAKS +static atomic_int all_waiters_count; +static atomic_int free_waiters_count; +#endif + +static waiter *get_waiter_for_thread (void) { + return __get_tls()->tib_nsync; +} + +static bool set_waiter_for_thread (waiter *w) { + __get_tls()->tib_nsync = w; + return (true); +} + +#if DETECT_WAITER_LEAKS +__attribute__((__destructor__)) static void reconcile_waiters (void) { + // we can't perform this check if using exit() with threads + if (!pthread_orphan_np ()) + return; + waiter *w; + if ((w = get_waiter_for_thread ())) { + nsync_waiter_destroy_ (w); + set_waiter_for_thread (0); + } + if (all_waiters_count != free_waiters_count) { + char ibuf[2][12]; + FormatInt32 (ibuf[0], all_waiters_count); + FormatInt32 (ibuf[1], free_waiters_count); + tinyprint (2, "error: nsync panic: all_waiter_count (", + ibuf[0], ") != free_waiters_count (", ibuf[1], + ")\n", NULL); + _Exit (156); + } +} +#endif + +static void all_waiters_push (waiter *w) { + w->next_all = atomic_load_explicit (&all_waiters, memory_order_relaxed); + while (!atomic_compare_exchange_weak_explicit (&all_waiters, &w->next_all, w, + memory_order_acq_rel, + memory_order_relaxed)) + pthread_pause_np (); +#if DETECT_WAITER_LEAKS + ++all_waiters_count; +#endif +} static void free_waiters_push (waiter *w) { uintptr_t tip; @@ -154,14 +201,16 @@ static void free_waiters_push (waiter *w) { tip = atomic_load_explicit (&free_waiters, memory_order_relaxed); for (;;) { w->next_free = (waiter *) PTR (tip); - if (atomic_compare_exchange_weak_explicit (&free_waiters, - &tip, + if (atomic_compare_exchange_weak_explicit (&free_waiters, &tip, ABA (w, TAG (tip) + 1), memory_order_release, memory_order_relaxed)) break; pthread_pause_np (); } +#if DETECT_WAITER_LEAKS + ++free_waiters_count; +#endif } static waiter *free_waiters_pop (void) { @@ -169,15 +218,18 @@ static waiter *free_waiters_pop (void) { uintptr_t tip; tip = atomic_load_explicit (&free_waiters, memory_order_relaxed); while ((w = (waiter *) PTR (tip))) { - if (atomic_compare_exchange_weak_explicit (&free_waiters, - &tip, + if (atomic_compare_exchange_weak_explicit (&free_waiters, &tip, ABA (w->next_free, TAG (tip) + 1), memory_order_acquire, memory_order_relaxed)) break; pthread_pause_np (); } - return w; +#if DETECT_WAITER_LEAKS + if (w) + --free_waiters_count; +#endif + return (w); } static bool free_waiters_populate (void) { @@ -193,7 +245,7 @@ static bool free_waiters_populate (void) { MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (waiters == MAP_FAILED) - return false; + return (false); for (size_t i = 0; i < n; ++i) { waiter *w = &waiters[i]; w->tag = WAITER_TAG; @@ -202,7 +254,7 @@ static bool free_waiters_populate (void) { if (!i) { // netbsd can run out of semaphores munmap (waiters, n * sizeof (waiter)); - return false; + return (false); } break; } @@ -211,47 +263,31 @@ static bool free_waiters_populate (void) { w->nw.flags = NSYNC_WAITER_FLAG_MUCV; dll_init (&w->same_condition); free_waiters_push (w); + all_waiters_push (w); } - return true; + return (true); } /* -------------------------------- */ -#define waiter_for_thread __get_tls()->tib_nsync - -void nsync_waiter_destroy (void *v) { - waiter *w = (waiter *) v; - /* Reset waiter_for_thread in case another thread-local variable reuses - the waiter in its destructor while the waiter is taken by the other - thread from free_waiters. This can happen as the destruction order - of thread-local variables can be arbitrary in some platform e.g. - POSIX. */ - waiter_for_thread = NULL; - ASSERT ((w->flags & (WAITER_RESERVED|WAITER_IN_USE)) == WAITER_RESERVED); - w->flags &= ~WAITER_RESERVED; - free_waiters_push (w); -} - /* Return a pointer to an unused waiter struct. Ensures that the enclosed timer is stopped and its channel drained. */ waiter *nsync_waiter_new_ (void) { waiter *w; waiter *tw; - unsigned attempts = 0; bool out_of_semaphores = false; - tw = waiter_for_thread; - w = tw; + w = tw = get_waiter_for_thread (); if (w == NULL || (w->flags & (WAITER_RESERVED|WAITER_IN_USE)) != WAITER_RESERVED) { while (!(w = free_waiters_pop ())) { if (!out_of_semaphores) if (!free_waiters_populate ()) out_of_semaphores = true; if (out_of_semaphores) - attempts = pthread_delay_np (&free_waiters, attempts); + pthread_yield_np (); } if (tw == NULL) { - w->flags |= WAITER_RESERVED; - waiter_for_thread = w; + if (set_waiter_for_thread (w)) + w->flags |= WAITER_RESERVED; } } w->flags |= WAITER_IN_USE; @@ -261,14 +297,67 @@ waiter *nsync_waiter_new_ (void) { /* Return an unused waiter struct *w to the free pool. */ void nsync_waiter_free_ (waiter *w) { ASSERT ((w->flags & WAITER_IN_USE) != 0); + w->wipe_mu = NULL; + w->wipe_cv = NULL; w->flags &= ~WAITER_IN_USE; if ((w->flags & WAITER_RESERVED) == 0) { + if (w == get_waiter_for_thread ()) + set_waiter_for_thread (0); free_waiters_push (w); - if (w == waiter_for_thread) - waiter_for_thread = 0; } } +/* Destroys waiter associated with dead thread. */ +void nsync_waiter_destroy_ (void *v) { + waiter *w = (waiter *) v; + ASSERT ((w->flags & (WAITER_RESERVED|WAITER_IN_USE)) == WAITER_RESERVED); + w->flags &= ~WAITER_RESERVED; + free_waiters_push (w); +} + +/* Ravages nsync waiters/locks/conds after fork(). */ +void nsync_waiter_wipe_ (void) { + int n = 0; + waiter *w; + waiter *next; + waiter *prev = 0; + waiter *wall = atomic_load_explicit (&all_waiters, memory_order_relaxed); + for (w = wall; w; w = w->next_all) + nsync_mu_semaphore_destroy (&w->sem); + for (w = wall; w; w = next) { + next = w->next_all; + w->tag = 0; + w->flags = 0; + w->nw.tag = 0; + w->nw.flags = NSYNC_WAITER_FLAG_MUCV; + atomic_init(&w->nw.waiting, 0); + w->l_type = 0; + bzero (&w->cond, sizeof (w->cond)); + dll_init (&w->same_condition); + if (w->wipe_mu) + bzero (w->wipe_mu, sizeof (*w->wipe_mu)); + if (w->wipe_cv) + bzero (w->wipe_cv, sizeof (*w->wipe_cv)); + if (!nsync_mu_semaphore_init (&w->sem)) + continue; /* leak it */ + w->next_free = prev; + w->next_all = prev; + prev = w; + ++n; + } +#if DETECT_WAITER_LEAKS + atomic_init (&all_waiters_count, n); + atomic_init (&free_waiters_count, n); +#else + (void)n; +#endif + atomic_init (&free_waiters, prev); + atomic_init (&all_waiters, prev); + for (struct Dll *e = dll_first (_pthread_list); e; + e = dll_next (_pthread_list, e)) + POSIXTHREAD_CONTAINER (e)->tib->tib_nsync = 0; +} + /* ====================================================================================== */ /* writer_type points to a lock_type that describes how to manipulate a mu for a writer. */ diff --git a/third_party/nsync/common.internal.h b/third_party/nsync/common.internal.h index 43b8b3c48..fb1f581c3 100644 --- a/third_party/nsync/common.internal.h +++ b/third_party/nsync/common.internal.h @@ -154,7 +154,7 @@ extern lock_type *nsync_reader_type_; /* ---------- */ -/* Hold a pair of condition function and its argument. */ +/* Hold a pair of condition function and its argument. */ struct wait_condition_s { int (*f)(const void *v); const void *v; @@ -191,18 +191,19 @@ struct wait_condition_s { ATM_STORE_REL (&w.waiting, 0); nsync_mu_semaphore_v (&w.sem); */ typedef struct waiter_s { - uint32_t tag; /* debug DLL_NSYNC_WAITER, DLL_WAITER, DLL_WAITER_SAMECOND */ - int flags; /* see WAITER_* bits below */ - nsync_semaphore sem; /* Thread waits on this semaphore. */ - struct nsync_waiter_s nw; /* An embedded nsync_waiter_s. */ - struct nsync_mu_s_ *cv_mu; /* pointer to nsync_mu associated with a cv wait */ - lock_type - *l_type; /* Lock type of the mu, or nil if not associated with a mu. */ - nsync_atomic_uint32_ remove_count; /* count of removals from queue */ + uint32_t tag; /* Debug DLL_NSYNC_WAITER, DLL_WAITER, DLL_WAITER_SAMECOND. */ + int flags; /* See WAITER_* bits below. */ + nsync_semaphore sem; /* Thread waits on this semaphore. */ + struct nsync_waiter_s nw; /* An embedded nsync_waiter_s. */ + struct nsync_mu_s_ *cv_mu; /* Pointer to nsync_mu associated with a cv wait. */ + lock_type *l_type; /* Lock type of the mu, or nil if not associated with a mu. */ + nsync_atomic_uint32_ remove_count; /* Monotonic count of removals from queue. */ struct wait_condition_s cond; /* A condition on which to acquire a mu. */ - struct Dll same_condition; /* Links neighbours in nw.q with same - non-nil condition. */ + struct Dll same_condition; /* Links neighbours in nw.q with same non-nil condition. */ + struct waiter_s * next_all; struct waiter_s * next_free; + struct nsync_mu_s_ *wipe_mu; + struct nsync_cv_s_ *wipe_cv; } waiter; static const uint32_t WAITER_TAG = 0x0590239f; static const uint32_t NSYNC_WAITER_TAG = 0x726d2ba9; diff --git a/third_party/nsync/mem/nsync_cv.c b/third_party/nsync/mem/nsync_cv.c index 9e798d4eb..c871c581d 100644 --- a/third_party/nsync/mem/nsync_cv.c +++ b/third_party/nsync/mem/nsync_cv.c @@ -286,6 +286,8 @@ int nsync_cv_wait_with_deadline_generic (nsync_cv *pcv, void *pmu, IGNORE_RACES_START (); c.w = nsync_waiter_new_ (); + c.w->wipe_cv = pcv; + c.w->wipe_mu = pmu; c.clock = clock; c.abs_deadline = abs_deadline; c.cancel_note = cancel_note; diff --git a/third_party/nsync/mu.c b/third_party/nsync/mu.c index 8e172e8ba..6da4d14a8 100644 --- a/third_party/nsync/mu.c +++ b/third_party/nsync/mu.c @@ -57,6 +57,7 @@ void nsync_mu_lock_slow_ (nsync_mu *mu, waiter *w, uint32_t clear, lock_type *l_ w->cond.f = NULL; /* Not using a conditional critical section. */ w->cond.v = NULL; w->cond.eq = NULL; + w->wipe_mu = mu; w->l_type = l_type; zero_to_acquire = l_type->zero_to_acquire; if (clear != 0) { @@ -202,6 +203,7 @@ void nsync_mu_rlock (nsync_mu *mu) { !atomic_compare_exchange_strong_explicit (&mu->word, &old_word, (old_word+MU_RADD_TO_ACQUIRE) & ~MU_RCLEAR_ON_ACQUIRE, memory_order_acquire, memory_order_relaxed)) { + LOCKTRACE("acquiring nsync_mu_rlock(%t)...", mu); waiter *w = nsync_waiter_new_ (); nsync_mu_lock_slow_ (mu, w, 0, nsync_reader_type_); nsync_waiter_free_ (w); diff --git a/third_party/nsync/mu_semaphore.c b/third_party/nsync/mu_semaphore.c index b3eb68255..cc6906400 100644 --- a/third_party/nsync/mu_semaphore.c +++ b/third_party/nsync/mu_semaphore.c @@ -30,6 +30,15 @@ bool nsync_mu_semaphore_init (nsync_semaphore *s) { } } +/* Destroy *s. */ +void nsync_mu_semaphore_destroy (nsync_semaphore *s) { + if (IsNetbsd ()) { + return nsync_mu_semaphore_destroy_sem (s); + } else { + return nsync_mu_semaphore_destroy_futex (s); + } +} + /* Wait until the count of *s exceeds 0, and decrement it. If POSIX cancellations are currently disabled by the thread, then this function always succeeds. When they're enabled in MASKED mode, this function may return ECANCELED. Otherwise, diff --git a/third_party/nsync/mu_semaphore.h b/third_party/nsync/mu_semaphore.h index 634d9fea4..fffb99e51 100644 --- a/third_party/nsync/mu_semaphore.h +++ b/third_party/nsync/mu_semaphore.h @@ -10,6 +10,9 @@ typedef struct nsync_semaphore_s_ { /* Initialize *s; the initial value is 0. */ bool nsync_mu_semaphore_init(nsync_semaphore *s); +/* Destroy *s. */ +void nsync_mu_semaphore_destroy(nsync_semaphore *s); + /* Wait until the count of *s exceeds 0, and decrement it. */ errno_t nsync_mu_semaphore_p(nsync_semaphore *s); diff --git a/third_party/nsync/mu_semaphore.internal.h b/third_party/nsync/mu_semaphore.internal.h index 6d8167d78..6fe15090f 100755 --- a/third_party/nsync/mu_semaphore.internal.h +++ b/third_party/nsync/mu_semaphore.internal.h @@ -5,19 +5,16 @@ COSMOPOLITAN_C_START_ bool nsync_mu_semaphore_init_futex(nsync_semaphore *); +void nsync_mu_semaphore_destroy_futex(nsync_semaphore *); errno_t nsync_mu_semaphore_p_futex(nsync_semaphore *); errno_t nsync_mu_semaphore_p_with_deadline_futex(nsync_semaphore *, int, nsync_time); void nsync_mu_semaphore_v_futex(nsync_semaphore *); bool nsync_mu_semaphore_init_sem(nsync_semaphore *); +void nsync_mu_semaphore_destroy_sem(nsync_semaphore *); errno_t nsync_mu_semaphore_p_sem(nsync_semaphore *); errno_t nsync_mu_semaphore_p_with_deadline_sem(nsync_semaphore *, int, nsync_time); void nsync_mu_semaphore_v_sem(nsync_semaphore *); -bool nsync_mu_semaphore_init_gcd(nsync_semaphore *); -errno_t nsync_mu_semaphore_p_gcd(nsync_semaphore *); -errno_t nsync_mu_semaphore_p_with_deadline_gcd(nsync_semaphore *, int, nsync_time); -void nsync_mu_semaphore_v_gcd(nsync_semaphore *); - COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_THIRD_PARTY_NSYNC_MU_SEMAPHORE_INTERNAL_H_ */ diff --git a/third_party/nsync/mu_semaphore_futex.c b/third_party/nsync/mu_semaphore_futex.c index 7c06ccee7..cc556267d 100644 --- a/third_party/nsync/mu_semaphore_futex.c +++ b/third_party/nsync/mu_semaphore_futex.c @@ -51,6 +51,9 @@ bool nsync_mu_semaphore_init_futex (nsync_semaphore *s) { return true; } +void nsync_mu_semaphore_destroy_futex (nsync_semaphore *s) { +} + /* Wait until the count of *s exceeds 0, and decrement it. If POSIX cancellations are currently disabled by the thread, then this function always succeeds. When they're enabled in MASKED mode, this function may return ECANCELED. Otherwise, diff --git a/third_party/nsync/mu_semaphore_sem.c b/third_party/nsync/mu_semaphore_sem.c index 2f8b61d45..a42b2e8c3 100644 --- a/third_party/nsync/mu_semaphore_sem.c +++ b/third_party/nsync/mu_semaphore_sem.c @@ -43,23 +43,14 @@ struct sem { int64_t id; - struct sem *next; }; -static _Atomic(struct sem *) g_sems; - static nsync_semaphore *sem_big_enough_for_sem = (nsync_semaphore *) (uintptr_t)(1 / (sizeof (struct sem) <= sizeof (*sem_big_enough_for_sem))); -static void sems_push (struct sem *f) { - f->next = atomic_load_explicit (&g_sems, memory_order_relaxed); - while (!atomic_compare_exchange_weak_explicit (&g_sems, &f->next, f, - memory_order_acq_rel, - memory_order_relaxed)) - pthread_pause_np (); -} - -static bool nsync_mu_semaphore_sem_create (struct sem *f) { +/* Initialize *s; the initial value is 0. */ +bool nsync_mu_semaphore_init_sem (nsync_semaphore *s) { + struct sem *f = (struct sem *) s; int rc; int lol; f->id = 0; @@ -77,23 +68,10 @@ static bool nsync_mu_semaphore_sem_create (struct sem *f) { return true; } -void nsync_mu_semaphore_sem_fork_child (void) { - struct sem *f; - for (f = atomic_load_explicit (&g_sems, memory_order_relaxed); f; f = f->next) { - int rc = sys_close (f->id); - STRACE ("close(%ld) → %d", f->id, rc); - } - for (f = atomic_load_explicit (&g_sems, memory_order_relaxed); f; f = f->next) - ASSERT (nsync_mu_semaphore_sem_create (f)); -} - -/* Initialize *s; the initial value is 0. */ -bool nsync_mu_semaphore_init_sem (nsync_semaphore *s) { +/* Destroys *s. */ +void nsync_mu_semaphore_destroy_sem (nsync_semaphore *s) { struct sem *f = (struct sem *) s; - if (!nsync_mu_semaphore_sem_create (f)) - return false; - sems_push (f); - return true; + sys_close (f->id); } /* Wait until the count of *s exceeds 0, and decrement it. If POSIX cancellations diff --git a/third_party/nsync/wait_s.internal.h b/third_party/nsync/wait_s.internal.h index 3d1d1de88..9bab15fdb 100644 --- a/third_party/nsync/wait_s.internal.h +++ b/third_party/nsync/wait_s.internal.h @@ -20,7 +20,7 @@ struct nsync_waiter_s { /* set if waiter is embedded in Mu/CV's internal structures */ #define NSYNC_WAITER_FLAG_MUCV 0x1 -void nsync_waiter_destroy(void *); +void nsync_waiter_destroy_(void *); COSMOPOLITAN_C_END_ #endif /* COSMOPOLITAN_LIBC_THREAD_WAIT_INTERNAL_H_ */