Improve pthread_join()

Since we're now on Windows 8, we can have clone() work as advertised on
Windows, where it sends a futex wake to the child tid. It's also likely
we no longer need to work around thread flakes on OpenBSD, in _wait0().
This commit is contained in:
Justine Tunney 2022-09-16 14:02:06 -07:00
parent 3733b43a8f
commit 994e1f4386
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
19 changed files with 154 additions and 74 deletions

View file

@ -16,50 +16,85 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/calls/struct/timespec.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/atomic.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/nt/runtime.h"
#include "libc/nt/synchronization.h"
#include "libc/sysv/consts/futex.h"
#include "libc/thread/thread.h"
#include "libc/thread/wait0.internal.h"
int _futex(atomic_int *, int, int, const struct timespec *);
static int _wait0_sleep(struct timespec *ts) {
int rc, e = errno;
if ((rc = nanosleep(ts, 0))) {
_npassert(errno == EINTR);
errno = e;
}
return rc;
}
static void _wait0_poll(struct timespec *ts) {
if (ts->tv_nsec < 1000) {
// prefer sched_yield() for small time intervals because nanosleep()
// will ceiling round to 1ms on the new technology.
sched_yield();
ts->tv_nsec <<= 1;
} else if (!_wait0_sleep(ts)) {
if (ts->tv_nsec < 100 * 1000 * 1000) {
ts->tv_nsec <<= 1;
}
}
}
static void _wait0_futex(const atomic_int *a, int e) {
int rc, op;
op = FUTEX_WAIT; // we need a shared mutex
if (IsWindows()) {
if (WaitOnAddress(a, &e, sizeof(*a), -1)) {
rc = 0;
} else {
rc = -GetLastError();
}
} else {
rc = _futex(a, op, e, 0);
if (IsOpenbsd() && rc > 0) {
rc = -rc;
}
}
STRACE("futex(%t, %s, %d, %s) → %s", a, DescribeFutexOp(op), e, "NULL",
DescribeFutexResult(rc));
_npassert(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK);
}
/**
* Blocks until memory location becomes zero.
*
* This is intended to be used on the child thread id, which is updated
* by the _spawn() system call when a thread terminates. The purpose of
* this operation is to know when it's safe to munmap() a threads stack
* by the clone() system call when a thread terminates. We need this in
* order to know when it's safe to free a thread's stack. This function
* uses futexes on Linux, OpenBSD, and Windows. On other platforms this
* uses polling with exponential backoff.
*/
void _wait0(const atomic_int *ctid) {
int x, rc;
char buf[12];
for (;;) {
if (!(x = atomic_load_explicit(ctid, memory_order_relaxed))) {
break;
} else if (IsLinux() || IsOpenbsd()) {
rc = _futex(ctid, FUTEX_WAIT, x, &(struct timespec){2});
STRACE("futex(%t, FUTEX_WAIT, %d, {2, 0}) → %s", ctid, x,
(DescribeFutexResult)(buf, rc));
if (IsOpenbsd() && rc > 0) rc = -rc;
if (!(rc == 0 || //
rc == -EINTR || //
rc == -ETIMEDOUT || //
rc == -EWOULDBLOCK)) {
notpossible;
}
int x;
struct timespec ts = {0, 1};
while ((x = atomic_load_explicit(ctid, memory_order_acquire))) {
if (IsLinux() || IsOpenbsd() || IsWindows()) {
_wait0_futex(ctid, x);
} else {
pthread_yield();
_wait0_poll(&ts);
}
}
if (IsOpenbsd()) {
// TODO(jart): Why do we need it? It's not even perfect.
// What's up with all these OpenBSD flakes??
pthread_yield();
}
}