Reduce code size of mandatory runtime

This change reduces o/tiny/examples/life from 44kb to 24kb in size since
it avoids linking mmap() when unnecessary. This is important, to helping
cosmo not completely lose touch with its roots.
This commit is contained in:
Justine Tunney 2024-07-04 02:50:20 -07:00
parent fdab49b30e
commit 15ea0524b3
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
17 changed files with 164 additions and 163 deletions

View file

@ -81,14 +81,11 @@ textwindows void __sig_delete(int sig) {
struct Dll *e;
atomic_fetch_and_explicit(&__sig.pending, ~(1ull << (sig - 1)),
memory_order_relaxed);
BLOCK_SIGNALS;
_pthread_lock();
for (e = dll_last(_pthread_list); e; e = dll_prev(_pthread_list, e)) {
for (e = dll_last(_pthread_list); e; e = dll_prev(_pthread_list, e))
atomic_fetch_and_explicit(&POSIXTHREAD_CONTAINER(e)->tib->tib_sigpending,
~(1ull << (sig - 1)), memory_order_relaxed);
}
_pthread_unlock();
ALLOW_SIGNALS;
}
static textwindows int __sig_getter(atomic_ulong *sigs, sigset_t masked) {
@ -559,7 +556,7 @@ void __stack_call(struct NtExceptionPointers *, int, int, struct CosmoTib *,
__msabi dontinstrument unsigned __sig_crash(struct NtExceptionPointers *ep) {
// translate win32 to unix si_signo and si_code
int code, sig = __sig_crash_sig(ep, &code);
int code, sig = __sig_crash_sig(ep->ExceptionRecord->ExceptionCode, &code);
// advance the instruction pointer to skip over debugger breakpoints
// this behavior is consistent with how unix kernels are implemented

View file

@ -1,7 +1,6 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_SIGNALS_INTERNAL_H_
#define COSMOPOLITAN_LIBC_CALLS_SIGNALS_INTERNAL_H_
#include "libc/calls/struct/sigset.h"
#include "libc/nt/struct/ntexceptionpointers.h"
#include "libc/thread/posixthread.internal.h"
#define SIG_HANDLED_NO_RESTART 1
@ -18,7 +17,7 @@ extern struct Signals __sig;
bool __sig_ignored(int);
int __sig_check(void);
int __sig_crash_sig(struct NtExceptionPointers *, int *);
int __sig_crash_sig(unsigned, int *);
int __sig_get(sigset_t);
int __sig_kill(struct PosixThread *, int, int);
int __sig_mask(int, const sigset_t *, sigset_t *);

View file

@ -18,6 +18,7 @@
*/
#include "libc/calls/sig.internal.h"
#include "libc/intrin/pushpop.internal.h"
#include "libc/macros.internal.h"
#include "libc/nt/enum/signal.h"
#include "libc/nt/enum/status.h"
#include "libc/nt/struct/ntexceptionpointers.h"
@ -26,82 +27,90 @@
// so, we trade away maintanibility for tininess
// see libc/sysv/consts.sh for canonical magnums
#define SIGILL_ pushpop(4)
#define SIGTRAP_ pushpop(5)
#define SIGABRT_ pushpop(6)
#define SIGFPE_ pushpop(8)
#define SIGSEGV_ pushpop(11)
#define SIGSYS_ pushpop(31)
#define SIGILL_ 4
#define SIGTRAP_ 5
#define SIGABRT_ 6
#define SIGFPE_ 8
#define SIGSEGV_ 11
#define SIGSYS_ 31
#define TRAP_BRKPT_ pushpop(1)
#define ILL_ILLOPC_ pushpop(1)
#define ILL_PRVOPC_ pushpop(5)
#define SEGV_MAPERR_ pushpop(1)
#define SEGV_ACCERR_ pushpop(2)
#define SI_USER_ pushpop(0)
#define FPE_FLTDIV_ pushpop(3)
#define FPE_FLTOVF_ pushpop(4)
#define FPE_INTOVF_ pushpop(2)
#define FPE_FLTUND_ pushpop(5)
#define FPE_FLTRES_ pushpop(6)
#define FPE_FLTINV_ pushpop(7)
#define SI_KERNEL_ 0x80
#define TRAP_BRKPT_ 1
#define ILL_ILLOPC_ 1
#define ILL_PRVOPC_ 5
#define SEGV_MAPERR_ 1
#define SEGV_ACCERR_ 2
#define SI_USER_ 0
#define FPE_FLTDIV_ 3
#define FPE_FLTOVF_ 4
#define FPE_INTOVF_ 2
#define FPE_FLTUND_ 5
#define FPE_FLTRES_ 6
#define FPE_FLTINV_ 7
#define SI_KERNEL_ 128
textwindows int __sig_crash_sig(struct NtExceptionPointers *ep, int *code) {
switch (ep->ExceptionRecord->ExceptionCode) {
case kNtSignalBreakpoint:
*code = TRAP_BRKPT_;
return SIGTRAP_;
case kNtSignalIllegalInstruction:
*code = ILL_ILLOPC_;
return SIGILL_;
case kNtSignalPrivInstruction:
*code = ILL_PRVOPC_;
return SIGILL_;
case kNtSignalInPageError:
case kNtStatusStackOverflow:
*code = SEGV_MAPERR_;
return SIGSEGV_;
case kNtSignalGuardPage:
case kNtSignalAccessViolation:
*code = SEGV_ACCERR_;
return SIGSEGV_;
case kNtSignalInvalidHandle:
case kNtSignalInvalidParameter:
case kNtSignalAssertionFailure:
*code = SI_USER_;
return SIGABRT_;
case kNtStatusIntegerOverflow:
*code = FPE_INTOVF_;
return SIGFPE_;
case kNtSignalFltDivideByZero:
*code = FPE_FLTDIV_;
return SIGFPE_;
case kNtSignalFltOverflow:
*code = FPE_FLTOVF_;
return SIGFPE_;
case kNtSignalFltUnderflow:
*code = FPE_FLTUND_;
return SIGFPE_;
case kNtSignalFltInexactResult:
*code = FPE_FLTRES_;
return SIGFPE_;
case kNtSignalFltDenormalOperand:
case kNtSignalFltInvalidOperation:
case kNtSignalFltStackCheck:
case kNtSignalIntegerDivideByZero:
case kNtSignalFloatMultipleFaults:
case kNtSignalFloatMultipleTraps:
*code = FPE_FLTINV_;
return SIGFPE_;
case kNtSignalDllNotFound:
case kNtSignalOrdinalNotFound:
case kNtSignalEntrypointNotFound:
case kNtSignalDllInitFailed:
*code = SI_KERNEL_;
return SIGSYS_;
default:
*code = ep->ExceptionRecord->ExceptionCode;
return SIGSEGV_;
#define LO(x) (x & 255)
#define HI(x) ((x >> 24) / !(x & 0x00ffff00u))
#define ROW(x, sic, sig) \
{ \
{ \
{ \
LO(x), HI(x), sic / !(sic & 0xffffff00), sig / !(sig & 0xffffff00) \
} \
} \
}
struct CrashSig {
union {
struct {
unsigned char lo;
unsigned char hi;
unsigned char sic;
unsigned char sig;
};
unsigned word;
};
};
static const struct CrashSig kNtCrashSigs[] = {
ROW(kNtSignalBreakpoint, TRAP_BRKPT_, SIGTRAP_), //
ROW(kNtSignalIllegalInstruction, ILL_ILLOPC_, SIGILL_), //
ROW(kNtSignalPrivInstruction, ILL_PRVOPC_, SIGILL_), //
ROW(kNtSignalInPageError, SEGV_MAPERR_, SIGSEGV_), //
ROW(kNtStatusStackOverflow, SEGV_MAPERR_, SIGSEGV_), //
ROW(kNtSignalGuardPage, SEGV_ACCERR_, SIGSEGV_), //
ROW(kNtSignalAccessViolation, SEGV_ACCERR_, SIGSEGV_), //
ROW(kNtSignalInvalidHandle, SI_USER_, SIGABRT_), //
ROW(kNtSignalInvalidParameter, SI_USER_, SIGABRT_), //
ROW(kNtStatusIntegerOverflow, FPE_INTOVF_, SIGFPE_), //
ROW(kNtSignalFltDivideByZero, FPE_FLTDIV_, SIGFPE_), //
ROW(kNtSignalFltOverflow, FPE_FLTOVF_, SIGFPE_), //
ROW(kNtSignalFltUnderflow, FPE_FLTUND_, SIGFPE_), //
ROW(kNtSignalFltInexactResult, FPE_FLTRES_, SIGFPE_), //
ROW(kNtSignalFltDenormalOperand, FPE_FLTINV_, SIGFPE_), //
ROW(kNtSignalFltInvalidOperation, FPE_FLTINV_, SIGFPE_), //
ROW(kNtSignalFltStackCheck, FPE_FLTINV_, SIGFPE_), //
ROW(kNtSignalIntegerDivideByZero, FPE_FLTINV_, SIGFPE_), //
// ROW(kNtSignalAssertionFailure, SI_USER_, SIGABRT_),
// ROW(kNtSignalFloatMultipleTraps, FPE_FLTINV_, SIGFPE_),
// ROW(kNtSignalFloatMultipleFaults, FPE_FLTINV_, SIGFPE_),
// ROW(kNtSignalDllNotFound, SI_KERNEL_, SIGSYS_),
// ROW(kNtSignalOrdinalNotFound, SI_KERNEL_, SIGSYS_),
// ROW(kNtSignalEntrypointNotFound, SI_KERNEL_, SIGSYS_),
// ROW(kNtSignalDllInitFailed, SI_KERNEL_, SIGSYS_),
};
textwindows dontinstrument int __sig_crash_sig(unsigned exception, int *code) {
for (int i = 0; i < ARRAYLEN(kNtCrashSigs); ++i) {
struct CrashSig cs;
cs.word = kNtCrashSigs[i].word;
unsigned lo = cs.lo;
unsigned hi = cs.hi;
unsigned ec = lo | hi << 24;
if (ec == exception) {
*code = cs.sic;
return cs.sig;
}
}
*code = exception;
return SIGSEGV_;
}

View file

@ -30,10 +30,8 @@
*/
struct AuxiliaryValue __getauxval(unsigned long at) {
unsigned long *ap;
for (ap = __auxv; ap[0]; ap += 2) {
if (at == ap[0]) {
for (ap = __auxv; ap && ap[0]; ap += 2)
if (at == ap[0])
return (struct AuxiliaryValue){ap[1], true};
}
}
return (struct AuxiliaryValue){0, false};
}

View file

@ -143,7 +143,8 @@ textstartup void __init_fds(int argc, char **argv, char **envp) {
break;
if (!TokAtoi(&fdspec, &protocol))
break;
__ensurefds_unlocked(fd);
if (_weaken(__ensurefds_unlocked))
_weaken(__ensurefds_unlocked)(fd);
struct Fd *f = fds->p + fd;
if (f->handle && f->handle != -1 && f->handle != handle) {
CloseHandle(f->handle);

View file

@ -40,9 +40,8 @@ int gettid(void) {
int tid;
if (VERY_LIKELY(__tls_enabled && !__vforked)) {
tid = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_acquire);
if (VERY_LIKELY(tid > 0)) {
if (VERY_LIKELY(tid > 0))
return tid;
}
}
if (IsXnuSilicon()) {
return enosys(); // can only happen if we can't access thread local storage

View file

@ -31,35 +31,51 @@ __static_yoink("_init_maps");
struct Maps __maps;
void __maps_add(struct Map *map) {
dll_init(&map->elem);
dll_make_first(&__maps.used, &map->elem);
map->next = __maps.maps;
__maps.maps = map;
++__maps.count;
}
static void __maps_adder(struct Map *map, int pagesz) {
__maps.pages += ((map->size + pagesz - 1) & -pagesz) / pagesz;
__maps_add(map);
}
void __maps_stack(void *stackaddr, int pagesz, size_t stacksize, int stackprot,
intptr_t stackhand) {
__maps.stack.addr = stackaddr;
__maps.stack.size = stacksize;
__maps.stack.prot = stackprot;
__maps.stack.h = stackhand;
__maps_adder(&__maps.stack, pagesz);
}
void __maps_init(void) {
int pagesz = getauxval(AT_PAGESZ);
// record _start() stack mapping
if (!IsWindows()) {
struct AddrSize stack;
stack = __get_main_stack();
dll_init(&__maps.stack.elem);
__maps.stack.addr = stack.addr;
__maps.stack.size = stack.size;
__maps.stack.prot = (uintptr_t)ape_stack_prot;
__maps_insert(&__maps.stack);
__maps_stack(stack.addr, pagesz, stack.size, (uintptr_t)ape_stack_prot, 0);
}
// record .text and .data mappings
static struct Map text, data;
dll_init(&text.elem);
text.addr = (char *)__executable_start;
text.size = _etext - __executable_start;
text.prot = PROT_READ | PROT_EXEC;
int pagesz = getauxval(AT_PAGESZ);
uintptr_t ds = ((uintptr_t)_etext + pagesz - 1) & -pagesz;
if (ds < (uintptr_t)_end) {
dll_init(&data.elem);
data.addr = (char *)ds;
data.size = (uintptr_t)_end - ds;
data.prot = PROT_READ | PROT_WRITE;
__maps_insert(&data);
__maps_adder(&data, pagesz);
}
__maps_insert(&text);
__maps_adder(&text, pagesz);
}
privileged void __maps_lock(void) {

View file

@ -43,11 +43,13 @@ void __maps_init(void);
void __maps_lock(void);
void __maps_check(void);
void __maps_unlock(void);
void __maps_add(struct Map *);
struct Map *__maps_alloc(void);
void __maps_free(struct Map *);
void __maps_insert(struct Map *);
int __munmap(char *, size_t, bool);
void *__mmap(char *, size_t, int, int, int, int64_t);
void __maps_stack(void *, int, size_t, int, intptr_t);
struct AddrSize __get_main_stack(void);
COSMOPOLITAN_C_END_

View file

@ -144,10 +144,7 @@ void __maps_insert(struct Map *map) {
dll_make_first(&__maps.used, &last->elem);
__maps_free(map);
} else {
dll_make_first(&__maps.used, &map->elem);
map->next = __maps.maps;
__maps.maps = map;
++__maps.count;
__maps_add(map);
}
__maps_check();
}

View file

@ -18,16 +18,12 @@
*/
#include "libc/thread/posixthread.internal.h"
pthread_spinlock_t _pthread_lock_obj;
void _pthread_init(void) {
(void)pthread_spin_init(&_pthread_lock_obj, 0);
}
pthread_mutex_t _pthread_lock_obj = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
void _pthread_lock(void) {
pthread_spin_lock(&_pthread_lock_obj);
pthread_mutex_lock(&_pthread_lock_obj);
}
void _pthread_unlock(void) {
pthread_spin_unlock(&_pthread_lock_obj);
pthread_mutex_unlock(&_pthread_lock_obj);
}

View file

@ -35,7 +35,7 @@ LIBC_PROC_A_DIRECTDEPS = \
LIBC_STR \
LIBC_SYSV \
LIBC_SYSV_CALLS \
THIRD_PARTY_NSYNC
THIRD_PARTY_NSYNC \
LIBC_PROC_A_DEPS := \
$(call uniq,$(foreach x,$(LIBC_PROC_A_DIRECTDEPS),$($(x))))

View file

@ -67,7 +67,6 @@
#ifdef __x86_64__
extern long __klog_handle;
extern atomic_uint free_waiters_mu;
void WipeKeystrokes(void);
__msabi extern typeof(GetCurrentProcessId) *const __imp_GetCurrentProcessId;

View file

@ -45,7 +45,8 @@
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/tls.h"
extern atomic_uint free_waiters_mu;
extern pthread_mutex_t nsync_waiters_mu;
extern pthread_mutex_t _pthread_lock_obj;
static void _onfork_prepare(void) {
if (_weaken(_pthread_onfork_prepare))
@ -53,12 +54,11 @@ static void _onfork_prepare(void) {
_pthread_lock();
__maps_lock();
__fds_lock();
while (atomic_exchange_explicit(&free_waiters_mu, 1, memory_order_acquire)) {
}
pthread_mutex_lock(&nsync_waiters_mu);
}
static void _onfork_parent(void) {
atomic_store_explicit(&free_waiters_mu, 0, memory_order_release);
pthread_mutex_unlock(&nsync_waiters_mu);
__fds_unlock();
__maps_unlock();
_pthread_unlock();
@ -67,13 +67,9 @@ static void _onfork_parent(void) {
}
static void _onfork_child(void) {
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
pthread_mutex_init(&__fds_lock_obj, &attr);
atomic_store_explicit(&free_waiters_mu, 0, memory_order_relaxed);
pthread_mutexattr_destroy(&attr);
_pthread_init();
__fds_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
nsync_waiters_mu = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
_pthread_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
atomic_store_explicit(&__maps.lock, 0, memory_order_relaxed);
atomic_store_explicit(&__get_tls()->tib_relock_maps, 0, memory_order_relaxed);
if (_weaken(_pthread_onfork_child))
@ -138,9 +134,8 @@ int _fork(uint32_t dwCreationFlags) {
atomic_store_explicit(&pt->pt_canceled, false, memory_order_relaxed);
// run user fork callbacks
if (__threaded) {
if (__threaded)
_onfork_child();
}
STRACE("fork() → 0 (child of %d)", parent);
} else {
// this is the parent process

View file

@ -159,7 +159,7 @@ static bool32 HasEnvironmentVariable(const char16_t *name) {
}
static abi unsigned OnWinCrash(struct NtExceptionPointers *ep) {
int code, sig = __sig_crash_sig(ep, &code);
int code, sig = __sig_crash_sig(ep->ExceptionRecord->ExceptionCode, &code);
TerminateThisProcess(sig);
}
@ -194,28 +194,23 @@ static abi wontreturn void WinInit(const char16_t *cmdline) {
__imp_AddVectoredExceptionHandler(true, (void *)OnWinCrash);
// allocate memory for stack and argument block
intptr_t stackhand;
char *stackaddr = (char *)GetStaticStackAddr(0);
size_t stacksize = GetStaticStackSize();
__imp_MapViewOfFileEx(
(__maps.stack.h = __imp_CreateFileMappingW(
-1, 0, kNtPageExecuteReadwrite, stacksize >> 32, stacksize, NULL)),
(stackhand = __imp_CreateFileMappingW(-1, 0, kNtPageExecuteReadwrite,
stacksize >> 32, stacksize, NULL)),
kNtFileMapWrite | kNtFileMapExecute, 0, 0, stacksize, stackaddr);
int prot = (intptr_t)ape_stack_prot;
if (~prot & PROT_EXEC) {
int stackprot = (intptr_t)ape_stack_prot;
if (~stackprot & PROT_EXEC) {
uint32_t old;
__imp_VirtualProtect(stackaddr, stacksize, kNtPageReadwrite, &old);
}
uint32_t oldattr;
__imp_VirtualProtect(stackaddr, GetGuardSize(),
kNtPageReadwrite | kNtPageGuard, &oldattr);
__maps.stack.addr = stackaddr;
__maps.stack.size = stacksize;
__maps.stack.prot = prot;
__maps.maps = &__maps.stack;
__maps.pages = (stacksize + 4095) / 4096;
__maps.count = 1;
dll_init(&__maps.stack.elem);
dll_make_first(&__maps.used, &__maps.stack.elem);
if (_weaken(__maps_stack))
_weaken(__maps_stack)(stackaddr, 4096, stacksize, stackprot, stackhand);
struct WinArgs *wa =
(struct WinArgs *)(stackaddr + (stacksize - sizeof(struct WinArgs)));

View file

@ -105,7 +105,6 @@ intptr_t _pthread_syshand(struct PosixThread *) libcesque;
long _pthread_cancel_ack(void) libcesque;
void _pthread_decimate(void) libcesque;
void _pthread_free(struct PosixThread *, bool) libcesque;
void _pthread_init(void) libcesque;
void _pthread_lock(void) libcesque;
void _pthread_onfork_child(void) libcesque;
void _pthread_onfork_parent(void) libcesque;

View file

@ -85,39 +85,38 @@ wontreturn void pthread_exit(void *rc) {
_pthread_decimate();
// run atexit handlers if orphaned thread
if (pthread_orphan_np()) {
if (_weaken(__cxa_finalize)) {
if (pthread_orphan_np())
if (_weaken(__cxa_finalize))
_weaken(__cxa_finalize)(NULL);
}
}
// transition the thread to a terminated state
status = atomic_load_explicit(&pt->pt_status, memory_order_acquire);
do {
switch (status) {
case kPosixThreadJoinable:
transition = kPosixThreadTerminated;
break;
case kPosixThreadDetached:
transition = kPosixThreadZombie;
break;
default:
__builtin_unreachable();
if (status == kPosixThreadZombie) {
transition = kPosixThreadZombie;
break;
} else if (status == kPosixThreadTerminated) {
transition = kPosixThreadTerminated;
break;
} else if (status == kPosixThreadJoinable) {
transition = kPosixThreadTerminated;
} else if (status == kPosixThreadDetached) {
transition = kPosixThreadZombie;
} else {
__builtin_trap();
}
} while (!atomic_compare_exchange_weak_explicit(
&pt->pt_status, &status, transition, memory_order_release,
memory_order_relaxed));
// make this thread a zombie if it was detached
if (transition == kPosixThreadZombie) {
if (transition == kPosixThreadZombie)
_pthread_zombify(pt);
}
// check if this is the last survivor
if (pthread_orphan_np()) {
for (const uintptr_t *p = __fini_array_end; p > __fini_array_start;) {
for (const uintptr_t *p = __fini_array_end; p > __fini_array_start;)
((void (*)(void))(*--p))();
}
_Exit(0);
}

View file

@ -181,7 +181,7 @@ static void *nsync_malloc (size_t size) {
static struct Dll *free_waiters = NULL;
/* free_waiters points to a doubly-linked list of free waiter structs. */
nsync_atomic_uint32_ free_waiters_mu; /* spinlock; protects free_waiters */
pthread_mutex_t nsync_waiters_mu = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
#define waiter_for_thread __get_tls()->tib_nsync
@ -196,9 +196,9 @@ void nsync_waiter_destroy (void *v) {
IGNORE_RACES_START ();
ASSERT ((w->flags & (WAITER_RESERVED|WAITER_IN_USE)) == WAITER_RESERVED);
w->flags &= ~WAITER_RESERVED;
nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
pthread_mutex_lock (&nsync_waiters_mu);
dll_make_first (&free_waiters, &w->nw.q);
ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
pthread_mutex_unlock (&nsync_waiters_mu);
IGNORE_RACES_END ();
}
@ -212,13 +212,13 @@ waiter *nsync_waiter_new_ (void) {
w = tw;
if (w == NULL || (w->flags & (WAITER_RESERVED|WAITER_IN_USE)) != WAITER_RESERVED) {
w = NULL;
nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
pthread_mutex_lock (&nsync_waiters_mu);
q = dll_first (free_waiters);
if (q != NULL) { /* If free list is non-empty, dequeue an item. */
dll_remove (&free_waiters, q);
w = DLL_WAITER (q);
}
ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
pthread_mutex_unlock (&nsync_waiters_mu);
if (w == NULL) { /* If free list was empty, allocate an item. */
w = (waiter *) nsync_malloc (sizeof (*w));
w->tag = WAITER_TAG;
@ -246,9 +246,9 @@ void nsync_waiter_free_ (waiter *w) {
ASSERT ((w->flags & WAITER_IN_USE) != 0);
w->flags &= ~WAITER_IN_USE;
if ((w->flags & WAITER_RESERVED) == 0) {
nsync_spin_test_and_set_ (&free_waiters_mu, 1, 1, 0);
pthread_mutex_lock (&nsync_waiters_mu);
dll_make_first (&free_waiters, &w->nw.q);
ATM_STORE_REL (&free_waiters_mu, 0); /* release store */
pthread_mutex_unlock (&nsync_waiters_mu);
if (w == waiter_for_thread)
waiter_for_thread = 0;
}