Improve pthread_join()

Since we're now on Windows 8, we can have clone() work as advertised on
Windows, where it sends a futex wake to the child tid. It's also likely
we no longer need to work around thread flakes on OpenBSD, in _wait0().
This commit is contained in:
Justine Tunney 2022-09-16 14:02:06 -07:00
parent 3733b43a8f
commit 994e1f4386
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
19 changed files with 154 additions and 74 deletions

View file

@ -16,13 +16,8 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/atomic.h"
#include "libc/intrin/kprintf.h"
#include "libc/limits.h"
#include "libc/thread/thread.h"
#include "third_party/nsync/counter.h"
#include "third_party/nsync/futex.internal.h"
#include "third_party/nsync/time.h"
/**
* Waits for all threads to arrive at barrier.

View file

@ -16,36 +16,43 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/thread/thread.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
/**
* Asks POSIX thread to free itself automatically on termination.
*
* @return 0 on success, or errno with error
* @raise EINVAL if thread is null or already detached
*/
int pthread_detach(pthread_t thread) {
struct PosixThread *pt;
enum PosixThreadStatus status;
struct PosixThread *pt = (struct PosixThread *)thread;
if (!(pt = (struct PosixThread *)thread)) {
return EINVAL;
}
for (;;) {
status = atomic_load_explicit(&pt->status, memory_order_relaxed);
status = atomic_load_explicit(&pt->status, memory_order_acquire);
if (status == kPosixThreadDetached || status == kPosixThreadZombie) {
// these two states indicate the thread was already detached, in
// which case it's already listed under _pthread_zombies.
break;
return EINVAL;
} else if (status == kPosixThreadTerminated) {
// thread was joinable and finished running. since pthread_join
// won't be called, it's safe to free the thread resources now.
// POSIX says this could be reported as ESRCH but then our test
// code would be less elegant in order for it to avoid flaking.
_pthread_wait(pt);
_pthread_free(pt);
break;
} else if (status == kPosixThreadJoinable) {
if (atomic_compare_exchange_weak_explicit(
&pt->status, &status, kPosixThreadDetached, memory_order_acquire,
&pt->status, &status, kPosixThreadDetached, memory_order_release,
memory_order_relaxed)) {
_pthread_zombies_add(pt);
break;

View file

@ -16,13 +16,10 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/errno.h"
#include "libc/thread/thread.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/thread/posixthread.internal.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
/**
* Waits for thread to terminate.
@ -31,12 +28,15 @@
* @raise EDEADLK if thread is detached
*/
int pthread_join(pthread_t thread, void **value_ptr) {
struct PosixThread *pt = (struct PosixThread *)thread;
if (pt->status == kPosixThreadDetached || //
pt->status == kPosixThreadZombie) {
assert(!"badjoin");
struct PosixThread *pt;
if (thread == __get_tls()->tib_pthread) {
return EDEADLK;
}
if (!(pt = (struct PosixThread *)thread) || //
pt->status == kPosixThreadZombie || //
pt->status == kPosixThreadDetached) {
return EINVAL;
}
_pthread_wait(pt);
if (value_ptr) {
*value_ptr = pt->rc;

View file

@ -177,19 +177,19 @@ extern const errno_t EBUSY;
#define pthread_spin_lock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
while (__atomic_test_and_set(&_s->_lock, __ATOMIC_SEQ_CST)) donothing; \
while (__atomic_test_and_set(&_s->_lock, __ATOMIC_ACQUIRE)) donothing; \
0; \
})
#define pthread_spin_unlock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
__atomic_store_n(&_s->_lock, 0, __ATOMIC_RELAXED); \
__atomic_store_n(&_s->_lock, 0, __ATOMIC_RELEASE); \
0; \
})
#define pthread_spin_trylock(pSpin) \
({ \
pthread_spinlock_t *_s = pSpin; \
__atomic_test_and_set(&_s->_lock, __ATOMIC_SEQ_CST) ? EBUSY : 0; \
__atomic_test_and_set(&_s->_lock, __ATOMIC_ACQUIRE) ? EBUSY : 0; \
})
#endif /* GCC 4.7+ */

View file

@ -28,6 +28,7 @@ LIBC_THREAD_A_DIRECTDEPS = \
LIBC_INTRIN \
LIBC_MEM \
LIBC_NT_KERNEL32 \
LIBC_NT_SYNCHRONIZATION \
LIBC_RUNTIME \
LIBC_STR \
LIBC_STUBS \

View file

@ -16,50 +16,85 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/struct/timespec.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"
#include "libc/sysv/consts/futex.h"
#include "libc/thread/thread.h"
#include "libc/thread/wait0.internal.h"
int _futex(atomic_int *, int, int, const struct timespec *);
static int _wait0_sleep(struct timespec *ts) {
int rc, e = errno;
if ((rc = nanosleep(ts, 0))) {
_npassert(errno == EINTR);
errno = e;
}
return rc;
}
static void _wait0_poll(struct timespec *ts) {
if (ts->tv_nsec < 1000) {
// prefer sched_yield() for small time intervals because nanosleep()
// will ceiling round to 1ms on the new technology.
sched_yield();
ts->tv_nsec <<= 1;
} else if (!_wait0_sleep(ts)) {
if (ts->tv_nsec < 100 * 1000 * 1000) {
ts->tv_nsec <<= 1;
}
}
}
static void _wait0_futex(const atomic_int *a, int e) {
int rc, op;
op = FUTEX_WAIT; // we need a shared mutex
if (IsWindows()) {
if (WaitOnAddress(a, &e, sizeof(*a), -1)) {
rc = 0;
} else {
rc = -GetLastError();
}
} else {
rc = _futex(a, op, e, 0);
if (IsOpenbsd() && rc > 0) {
rc = -rc;
}
}
STRACE("futex(%t, %s, %d, %s) → %s", a, DescribeFutexOp(op), e, "NULL",
DescribeFutexResult(rc));
_npassert(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK);
}
/**
* Blocks until memory location becomes zero.
*
* This is intended to be used on the child thread id, which is updated
* by the _spawn() system call when a thread terminates. The purpose of
* this operation is to know when it's safe to munmap() a threads stack
* by the clone() system call when a thread terminates. We need this in
* order to know when it's safe to free a thread's stack. This function
* uses futexes on Linux, OpenBSD, and Windows. On other platforms this
* uses polling with exponential backoff.
*/
void _wait0(const atomic_int *ctid) {
int x, rc;
char buf[12];
for (;;) {
if (!(x = atomic_load_explicit(ctid, memory_order_relaxed))) {
break;
} else if (IsLinux() || IsOpenbsd()) {
rc = _futex(ctid, FUTEX_WAIT, x, &(struct timespec){2});
STRACE("futex(%t, FUTEX_WAIT, %d, {2, 0}) → %s", ctid, x,
(DescribeFutexResult)(buf, rc));
if (IsOpenbsd() && rc > 0) rc = -rc;
if (!(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK)) {
notpossible;
}
int x;
struct timespec ts = {0, 1};
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
if (IsLinux() || IsOpenbsd() || IsWindows()) {
_wait0_futex(ctid, x);
} else {
pthread_yield();
_wait0_poll(&ts);
}
}
if (IsOpenbsd()) {
// TODO(jart): Why do we need it? It's not even perfect.
// What's up with all these OpenBSD flakes??
pthread_yield();
}
}