Improve pthread_join()

Since we're now on Windows 8, we can have clone() work as advertised on
Windows, where it sends a futex wake to the child tid. It's also likely
we no longer need to work around thread flakes on OpenBSD, in _wait0().
This commit is contained in:
Justine Tunney 2022-09-16 14:02:06 -07:00
parent 3733b43a8f
commit 994e1f4386
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
19 changed files with 154 additions and 74 deletions

View file

@ -102,7 +102,7 @@ endif
# - Larger binaries
#
ifeq ($(MODE), asan)
CONFIG_CCFLAGS += $(BACKTRACES) -O2
CONFIG_CCFLAGS += $(BACKTRACES) -O2 -DSYSDEBUG
CONFIG_COPTS += -fsanitize=address
TARGET_ARCH ?= -msse3
endif

View file

@ -18,10 +18,10 @@
*/
#include "libc/calls/calls.h"
#include "libc/calls/internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/syscall-nt.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/intrin/strace.internal.h"
#include "libc/sysv/errfuns.h"
/**

View file

@ -19,10 +19,10 @@
#include "libc/calls/asan.internal.h"
#include "libc/calls/calls.h"
#include "libc/calls/state.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/struct/timespec.internal.h"
#include "libc/dce.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/sysv/errfuns.h"
/**
@ -71,10 +71,10 @@ int nanosleep(const struct timespec *req, struct timespec *rem) {
rem->tv_nsec = 0;
}
#if defined(SYSDEBUG) && _POLLTRACE
#ifdef SYSDEBUG
if (!__time_critical) {
POLLTRACE("nanosleep(%s, [%s]) → %d% m", DescribeTimespec(rc, req),
DescribeTimespec(rc, rem), rc);
STRACE("nanosleep(%s, [%s]) → %d% m", DescribeTimespec(rc, req),
DescribeTimespec(rc, rem), rc);
}
#endif

45
libc/intrin/bt.c Normal file
View file

@ -0,0 +1,45 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/intrin.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/weaken.h"
#include "libc/log/backtrace.internal.h"
/**
* Shows backtrace if crash reporting facilities are linked.
*/
void _bt(const char *fmt, ...) {
int e;
va_list va;
if (fmt) {
va_start(va, fmt);
kvprintf(fmt, va);
va_end(va);
}
if (_weaken(ShowBacktrace)) {
e = errno;
_weaken(ShowBacktrace)(2, __builtin_frame_address(0));
errno = e;
} else {
kprintf("_bt() can't show backtrace because you need:\n"
"\tSTATIC_YOINK(\"ShowBacktrace\");\n"
"to be linked.\n");
}
}

View file

@ -16,7 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"

View file

@ -17,7 +17,6 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"

View file

@ -17,18 +17,18 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/_getauxval.internal.h"
#include "libc/thread/thread.h"
#include "libc/nexgen32e/rdtsc.h"
#include "libc/thread/tls.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
static struct {
int thepid;
uint128_t thepool;
pthread_mutex_t lock;
pthread_spinlock_t lock;
} g_rand64;
/**
@ -49,7 +49,7 @@ static struct {
uint64_t rand64(void) {
void *p;
uint128_t s;
if (__threaded) pthread_mutex_lock(&g_rand64.lock);
if (__threaded) pthread_spin_lock(&g_rand64.lock);
if (__pid == g_rand64.thepid) {
s = g_rand64.thepool; // normal path
} else {
@ -70,6 +70,6 @@ uint64_t rand64(void) {
g_rand64.thepid = __pid;
}
g_rand64.thepool = (s *= 15750249268501108917ull); // lemur64
if (__threaded) pthread_mutex_unlock(&g_rand64.lock);
pthread_spin_unlock(&g_rand64.lock);
return s >> 64;
}

View file

@ -26,6 +26,7 @@
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"
#include "libc/nt/thread.h"
#include "libc/nt/thunk/msabi.h"
#include "libc/runtime/clone.internal.h"
@ -56,6 +57,7 @@
__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
__msabi extern typeof(ExitThread) *const __imp_ExitThread;
__msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
struct CloneArgs {
union {
@ -106,6 +108,7 @@ WinThreadEntry(int rdi, // rcx
// we can now clear ctid directly since we're no longer using our own
// stack memory, which can now be safely free'd by the parent thread.
*wt->ztid = 0;
__imp_WakeByAddressAll(wt->ztid);
// since we didn't indirect this function through NT2SYSV() it's not
// safe to simply return, and as such, we just call ExitThread().
__imp_ExitThread(rc);

View file

@ -50,6 +50,7 @@ extern bool __isworker;
void mcount(void);
int _freestack(void *);
void _bt(const char *, ...);
unsigned long getauxval(unsigned long);
void *_mapanon(size_t) attributeallocsize((1)) mallocesque;
void *_mapshared(size_t) attributeallocsize((1)) mallocesque;

View file

@ -40,6 +40,7 @@ LIBC_RUNTIME_A_DIRECTDEPS = \
LIBC_NEXGEN32E \
LIBC_NT_ADVAPI32 \
LIBC_NT_KERNEL32 \
LIBC_NT_SYNCHRONIZATION \
LIBC_STR \
LIBC_STUBS \
LIBC_SYSV \

View file

@ -16,13 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/atomic.h"
#include "libc/intrin/kprintf.h"
#include "libc/limits.h"
#include "libc/thread/thread.h"
#include "third_party/nsync/counter.h"
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/time.h"
/**
* Waits for all threads to arrive at barrier.

View file

@ -16,36 +16,43 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/thread/thread.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
/**
* Asks POSIX thread to free itself automatically on termination.
*
* @return 0 on success, or errno with error
* @raise EINVAL if thread is null or already detached
*/
int pthread_detach(pthread_t thread) {
struct PosixThread *pt;
enum PosixThreadStatus status;
struct PosixThread *pt = (struct PosixThread *)thread;
if (!(pt = (struct PosixThread *)thread)) {
return EINVAL;
}
for (;;) {
status = atomic_load_explicit(&pt->status, memory_order_relaxed);
status = atomic_load_explicit(&pt->status, memory_order_acquire);
if (status == kPosixThreadDetached || status == kPosixThreadZombie) {
// these two states indicate the thread was already detached, in
// which case it's already listed under _pthread_zombies.
break;
return EINVAL;
} else if (status == kPosixThreadTerminated) {
// thread was joinable and finished running. since pthread_join
// won't be called, it's safe to free the thread resources now.
// POSIX says this could be reported as ESRCH but then our test
// code would be less elegant in order for it to avoid flaking.
_pthread_wait(pt);
_pthread_free(pt);
break;
} else if (status == kPosixThreadJoinable) {
if (atomic_compare_exchange_weak_explicit(
&pt->status, &status, kPosixThreadDetached, memory_order_acquire,
&pt->status, &status, kPosixThreadDetached, memory_order_release,
memory_order_relaxed)) {
_pthread_zombies_add(pt);
break;

View file

@ -16,13 +16,10 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/errno.h"
#include "libc/thread/thread.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
/**
* Waits for thread to terminate.
@ -31,12 +28,15 @@
* @raise EDEADLK if thread is detached
*/
int pthread_join(pthread_t thread, void **value_ptr) {
struct PosixThread *pt = (struct PosixThread *)thread;
if (pt->status == kPosixThreadDetached || //
pt->status == kPosixThreadZombie) {
assert(!"badjoin");
struct PosixThread *pt;
if (thread == __get_tls()->tib_pthread) {
return EDEADLK;
}
if (!(pt = (struct PosixThread *)thread) || //
pt->status == kPosixThreadZombie || //
pt->status == kPosixThreadDetached) {
return EINVAL;
}
_pthread_wait(pt);
if (value_ptr) {
*value_ptr = pt->rc;

View file

@ -177,19 +177,19 @@ extern const errno_t EBUSY;
#define pthread_spin_lock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
while (__atomic_test_and_set(&_s->_lock, __ATOMIC_SEQ_CST)) donothing; \
while (__atomic_test_and_set(&_s->_lock, __ATOMIC_ACQUIRE)) donothing; \
0; \
})
#define pthread_spin_unlock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
__atomic_store_n(&_s->_lock, 0, __ATOMIC_RELAXED); \
__atomic_store_n(&_s->_lock, 0, __ATOMIC_RELEASE); \
0; \
})
#define pthread_spin_trylock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
__atomic_test_and_set(&_s->_lock, __ATOMIC_SEQ_CST) ? EBUSY : 0; \
__atomic_test_and_set(&_s->_lock, __ATOMIC_ACQUIRE) ? EBUSY : 0; \
})
#endif /* GCC 4.7+ */

View file

@ -28,6 +28,7 @@ LIBC_THREAD_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_MEM \
LIBC_NT_KERNEL32 \
LIBC_NT_SYNCHRONIZATION \
LIBC_RUNTIME \
LIBC_STR \
LIBC_STUBS \

View file

@ -16,50 +16,85 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/struct/timespec.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"
#include "libc/sysv/consts/futex.h"
#include "libc/thread/thread.h"
#include "libc/thread/wait0.internal.h"
int _futex(atomic_int *, int, int, const struct timespec *);
static int _wait0_sleep(struct timespec *ts) {
int rc, e = errno;
if ((rc = nanosleep(ts, 0))) {
_npassert(errno == EINTR);
errno = e;
}
return rc;
}
static void _wait0_poll(struct timespec *ts) {
if (ts->tv_nsec < 1000) {
// prefer sched_yield() for small time intervals because nanosleep()
// will ceiling round to 1ms on the new technology.
sched_yield();
ts->tv_nsec <<= 1;
} else if (!_wait0_sleep(ts)) {
if (ts->tv_nsec < 100 * 1000 * 1000) {
ts->tv_nsec <<= 1;
}
}
}
static void _wait0_futex(const atomic_int *a, int e) {
int rc, op;
op = FUTEX_WAIT; // we need a shared mutex
if (IsWindows()) {
if (WaitOnAddress(a, &e, sizeof(*a), -1)) {
rc = 0;
} else {
rc = -GetLastError();
}
} else {
rc = _futex(a, op, e, 0);
if (IsOpenbsd() && rc > 0) {
rc = -rc;
}
}
STRACE("futex(%t, %s, %d, %s) → %s", a, DescribeFutexOp(op), e, "NULL",
DescribeFutexResult(rc));
_npassert(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK);
}
/**
* Blocks until memory location becomes zero.
*
* This is intended to be used on the child thread id, which is updated
* by the _spawn() system call when a thread terminates. The purpose of
* this operation is to know when it's safe to munmap() a threads stack
* by the clone() system call when a thread terminates. We need this in
* order to know when it's safe to free a thread's stack. This function
* uses futexes on Linux, OpenBSD, and Windows. On other platforms this
* uses polling with exponential backoff.
*/
void _wait0(const atomic_int *ctid) {
int x, rc;
char buf[12];
for (;;) {
if (!(x = atomic_load_explicit(ctid, memory_order_relaxed))) {
break;
} else if (IsLinux() || IsOpenbsd()) {
rc = _futex(ctid, FUTEX_WAIT, x, &(struct timespec){2});
STRACE("futex(%t, FUTEX_WAIT, %d, {2, 0}) → %s", ctid, x,
(DescribeFutexResult)(buf, rc));
if (IsOpenbsd() && rc > 0) rc = -rc;
if (!(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK)) {
notpossible;
}
int x;
struct timespec ts = {0, 1};
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
if (IsLinux() || IsOpenbsd() || IsWindows()) {
_wait0_futex(ctid, x);
} else {
pthread_yield();
_wait0_poll(&ts);
}
}
if (IsOpenbsd()) {
// TODO(jart): Why do we need it? It's not even perfect.
// What's up with all these OpenBSD flakes??
pthread_yield();
}
}

View file

@ -81,7 +81,7 @@ BENCH(lock, scalability) {
pthread_barrier_destroy(&barrier);
pthread_mutex_destroy(&lock);
t2 = _timespec_real();
printf("consumed %10g seconds monotonic time and %10g seconds cpu time\n",
_timespec_tonanos(_timespec_sub(t2, t1)) / 1000000000.,
printf("consumed %10g seconds real time and %10g seconds cpu time\n",
_timespec_tonanos(_timespec_sub(t2, t1)) / 1e9,
(double)clock() / CLOCKS_PER_SEC);
}

View file

@ -18,29 +18,18 @@
*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigset.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/stack.h"
#include "libc/stdio/rand.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sig.h"
#include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
#include "libc/time/time.h"
#define THREADS 8
#define ENTRIES 1024
#define ENTRIES 100
volatile uint64_t A[THREADS * ENTRIES];
pthread_barrier_t barrier;

View file

@ -54,11 +54,16 @@ void TriggerSignal(void) {
}
static void *Increment(void *arg) {
ASSERT_EQ(EDEADLK, pthread_join(pthread_self(), 0));
ASSERT_EQ(gettid(), pthread_getthreadid_np());
TriggerSignal();
return (void *)((uintptr_t)arg + 1);
}
TEST(pthread_create, joinSelfDeadlocks) {
ASSERT_EQ(EDEADLK, pthread_join(pthread_self(), 0));
}
TEST(pthread_create, testCreateReturnJoin) {
void *rc;
pthread_t id;