Make fixes and improvements

- clock_nanosleep() is now much faster on OpenBSD and NetBSD
- Thread joining is now much faster on NetBSD
- FreeBSD timestamps are now more accurate
- Thread spawning now goes faster on XNU
- Clean up the clone() code
This commit is contained in:
Justine Tunney 2022-11-08 10:09:47 -08:00
parent aee50b1327
commit b407327972
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
47 changed files with 645 additions and 306 deletions

View file

@ -28,7 +28,7 @@
// @param r8 is tls
// @param r9 is func(void*,int)→int
// @param 8(rsp) is arg
// @return tid of child on success, or -1 w/ errno
// @return tid of child on success, or -errno on error
sys_clone_linux:
push %rbp
mov %rsp,%rbp
@ -39,13 +39,9 @@ sys_clone_linux:
syscall
test %rax,%rax
jz 2f
cmp $-4095,%rax
jae 1f
0: pop %rbx
pop %rbp
ret
1: call systemfive_error
jmp 0b
2: xor %ebp,%ebp # child thread
mov %rbx,%rdi # arg
mov (%r10),%esi # tid

26
libc/runtime/clone-xnu.S Normal file
View file

@ -0,0 +1,26 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
sys_clone_xnu:
mov $0x02000168,%eax # bsdthread_create
mov %rcx,%r10
syscall
ret
.endfn sys_clone_xnu,globl,hidden

View file

@ -16,12 +16,14 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/ucontext-netbsd.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
@ -32,6 +34,7 @@
#include "libc/runtime/clone.internal.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/internal.h"
#include "libc/stdalign.internal.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
@ -66,7 +69,6 @@ struct CloneArgs {
uint32_t utid;
int64_t tid64;
};
pthread_spinlock_t lock;
int *ptid;
int *ctid;
int *ztid;
@ -99,7 +101,6 @@ WinThreadEntry(int rdi, // rcx
struct CloneArgs *wt) { // r9
int rc;
if (wt->tls) __set_tls_win32(wt->tls);
*wt->ptid = wt->tid;
*wt->ctid = wt->tid;
rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt & -16);
// we can now clear ctid directly since we're no longer using our own
@ -112,15 +113,14 @@ WinThreadEntry(int rdi, // rcx
notpossible;
}
static textwindows int CloneWindows(int (*func)(void *, int), char *stk,
size_t stksz, int flags, void *arg,
void *tls, int *ptid, int *ctid) {
static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk,
size_t stksz, int flags, void *arg,
void *tls, int *ptid, int *ctid) {
int64_t h;
struct CloneArgs *wt;
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->func = func;
@ -128,9 +128,12 @@ static textwindows int CloneWindows(int (*func)(void *, int), char *stk,
wt->tls = flags & CLONE_SETTLS ? tls : 0;
if ((h = CreateThread(0, 4096, (void *)WinThreadEntry, wt, 0, &wt->utid))) {
CloseHandle(h);
return wt->tid;
if (flags & CLONE_PARENT_SETTID) {
*ptid = wt->tid;
}
return 0;
} else {
return -1;
return __dos2errno(GetLastError());
}
}
@ -161,9 +164,8 @@ XnuThreadMain(void *pthread, // rdi
unsigned xnuflags) { // r9
int ax;
wt->tid = tid;
*wt->ptid = tid;
*wt->ctid = tid;
pthread_spin_unlock(&wt->lock);
*wt->ptid = tid;
if (wt->tls) {
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
@ -192,23 +194,16 @@ XnuThreadMain(void *pthread, // rdi
notpossible;
}
static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
void *arg, void *tls, int *ptid, int *ctid) {
static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
void *arg, void *tls, int *ptid, int *ctid) {
int rc;
bool failed;
static bool once;
static int broken;
struct CloneArgs *wt;
if (!once) {
if (sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0) == -1) {
broken = errno;
}
_npassert(sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0) != -1);
once = true;
}
if (broken) {
errno = broken;
return -1;
}
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
@ -216,14 +211,7 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
wt->lock._lock = 1;
if ((rc = sys_bsdthread_create(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU)) !=
-1) {
pthread_spin_lock(&wt->lock);
rc = wt->tid;
pthread_spin_unlock(&wt->lock);
}
return rc;
return sys_clone_xnu(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU);
}
////////////////////////////////////////////////////////////////////////////////
@ -231,7 +219,6 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
static wontreturn void FreebsdThreadMain(void *p) {
struct CloneArgs *wt = p;
*wt->ptid = wt->tid;
*wt->ctid = wt->tid;
wt->func(wt->arg, wt->tid);
// we no longer use the stack after this point
@ -249,8 +236,9 @@ static wontreturn void FreebsdThreadMain(void *p) {
notpossible;
}
static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
int flags, void *arg, void *tls, int *ptid, int *ctid) {
static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
int flags, void *arg, void *tls, int *ptid,
int *ctid) {
int ax;
bool failed;
int64_t tid;
@ -258,7 +246,6 @@ static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = tls;
@ -278,11 +265,9 @@ static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
: CFLAG_CONSTRAINT(failed), "=a"(ax)
: "1"(__NR_thr_new), "D"(&params), "S"(sizeof(params))
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory");
if (failed) {
errno = ax;
tid = -1;
}
return tid;
if (failed) return ax;
if (flags & CLONE_PARENT_SETTID) *ptid = tid;
return 0;
}
////////////////////////////////////////////////////////////////////////////////
@ -292,7 +277,6 @@ static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
// 1. __asan_handle_no_return wipes stack [todo?]
noasan static wontreturn void OpenbsdThreadMain(void *p) {
struct CloneArgs *wt = p;
*wt->ptid = wt->tid;
*wt->ctid = wt->tid;
wt->func(wt->arg, wt->tid);
asm volatile("mov\t%2,%%rsp\n\t" // so syscall can validate stack exists
@ -308,9 +292,10 @@ noasan static wontreturn void OpenbsdThreadMain(void *p) {
notpossible;
}
static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
int flags, void *arg, void *tls, int *ptid, int *ctid) {
int tid;
static errno_t CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
int flags, void *arg, void *tls, int *ptid,
int *ctid) {
int rc;
intptr_t sp;
struct __tfork *tf;
struct CloneArgs *wt;
@ -321,7 +306,6 @@ static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
sp -= sizeof(struct CloneArgs);
sp &= -MAX(16, alignof(struct CloneArgs));
wt = (struct CloneArgs *)sp;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->arg = arg;
@ -329,11 +313,15 @@ static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
tf->tf_stack = (char *)wt - 8;
tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0;
tf->tf_tid = &wt->tid;
if ((tid = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) < 0) {
errno = -tid;
tid = -1;
if ((rc = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) >= 0) {
_npassert(rc);
if (flags & CLONE_PARENT_SETTID) {
*ptid = rc;
}
return 0;
} else {
return -rc;
}
return tid;
}
////////////////////////////////////////////////////////////////////////////////
@ -343,8 +331,7 @@ static wontreturn void NetbsdThreadMain(void *arg, // rdi
int (*func)(void *, int), // rsi
int *tid, // rdx
int *ctid, // rcx
int *ztid, // r8
int *ptid) { // r9
int *ztid) { // r9
int ax, dx;
// TODO(jart): Why are we seeing flakes where *tid is zero?
// ax = *tid;
@ -370,7 +357,6 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
int ax, *tid;
intptr_t dx, sp;
static bool once;
static int broken;
struct ucontext_netbsd *ctx;
static struct ucontext_netbsd netbsd_clone_template;
@ -380,15 +366,9 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
: CFLAG_CONSTRAINT(failed), "=a"(ax)
: "1"(__NR_getcontext_netbsd), "D"(&netbsd_clone_template)
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory");
if (failed) {
broken = ax;
}
_npassert(!failed);
once = true;
}
if (broken) {
errno = broken;
return -1;
}
sp = (intptr_t)(stk + stksz);
// allocate memory for tid
@ -420,7 +400,6 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
ctx->uc_mcontext.rdx = (intptr_t)tid;
ctx->uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid);
ctx->uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid);
ctx->uc_mcontext.r9 = (intptr_t)(flags & CLONE_PARENT_SETTID ? ptid : tid);
ctx->uc_flags |= _UC_STACK;
ctx->uc_stack.ss_sp = stk;
ctx->uc_stack.ss_size = stksz;
@ -436,10 +415,13 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
: "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(tid)
: "rcx", "r8", "r9", "r10", "r11", "memory");
if (!failed) {
return *tid;
_npassert(*tid);
if (flags & CLONE_PARENT_SETTID) {
*ptid = *tid;
}
return 0;
} else {
errno = ax;
return -1;
return ax;
}
}
@ -454,8 +436,9 @@ int sys_clone_linux(int flags, // rdi
void *func, // r9
void *arg); // 8(rsp)
static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
int flags, void *arg, void *tls, int *ptid, int *ctid) {
int rc;
long sp;
sp = (intptr_t)(stk + stksz);
if (~flags & CLONE_CHILD_SETTID) {
@ -465,7 +448,12 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
ctid = (int *)sp;
}
sp = sp & -16; // align the stack
return sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg);
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
// clone() is documented as setting ptid before return
return 0;
} else {
return -rc;
}
}
////////////////////////////////////////////////////////////////////////////////
@ -478,14 +466,19 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
*
* int worker(void *arg) { return 0; }
* struct CosmoTib tib = {.tib_self = &tib, .tib_tid = -1};
* atomic_int tid;
* char *stk = _mapstack();
* tid = clone(worker, stk, GetStackSize() - 16,
* CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
* CLONE_SIGHAND | CLONE_CHILD_SETTID |
* CLONE_CHILD_CLEARTID | CLONE_SETTLS,
* arg, 0, &tib, &tib.tib_tid);
* // ...
* while (atomic_load(&tib.tib_tid)) sched_yield();
* clone(worker, stk, GetStackSize() - 16,
* CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
* CLONE_SIGHAND | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
* CLONE_CHILD_CLEARTID | CLONE_SETTLS,
* arg, &tid, &tib, &tib.tib_tid);
* while (atomic_load(&tid) == 0) sched_yield();
* // thread is known
* while (atomic_load(&tib.tib_tid) < 0) sched_yield();
* // thread is running
* while (atomic_load(&tib.tib_tid) > 0) sched_yield();
* // thread has terminated
* _freestack(stk);
*
* Threads are created in a detached manner. They currently can't be
@ -543,14 +536,14 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
* Your `flags` may also optionally also additionally bitwise-OR any
* combination of the following additional flags:
*
* - `CLONE_PARENT_SETTID` must be specified if you intend to set
* the `ptid` argument, which is guaranteed to be updated with the
* child tid BEFORE BOTH clone() returns and `func` is invoked
*
* - `CLONE_CHILD_SETTID` must be specified if you intend to set the
* `ctid` argument, which is guaranteed to be updated with the
* child tid before `func` is called, however we CAN NOT guarantee
* this will happen BEFORE clone() returns
* `ctid` argument, which will updated with the child tid once the
* child has started.
*
* - `CLONE_PARENT_SETTID` must be specified if you intend to set
* the `ptid` argument, which is updated at the most opportune
* moment. On all platforms except XNU, this happens before
* clone() returns. On XNU, it happens once the thread starts.
*
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon child thread
* termination. This is used to implement join so that the parent
@ -568,11 +561,11 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
* this parameter is ignored if `CLONE_SETTLS` is not set
* @param ctid lets the child receive its thread id without having to
* call gettid() and is ignored if `CLONE_CHILD_SETTID` isn't set
* @return tid of child on success, or -1 w/ errno
* @return 0 on success, or errno on errno
* @threadsafe
*/
int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
void *tls, void *ctid) {
errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
void *ptid, void *tls, void *ctid) {
int rc;
if (flags & CLONE_THREAD) {
@ -580,17 +573,17 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
}
if (!func) {
rc = einval();
rc = EINVAL;
} else if (!IsTiny() &&
((flags & CLONE_VM) && (stksz < PAGESIZE || (stksz & 15)))) {
rc = einval();
rc = EINVAL;
} else if (IsAsan() &&
(((flags & CLONE_SETTLS) && !__asan_is_valid(tls, 64)) ||
((flags & CLONE_PARENT_SETTID) &&
!__asan_is_valid(ptid, sizeof(*ptid))) ||
!__asan_is_valid(ptid, sizeof(int))) ||
((flags & CLONE_CHILD_SETTID) &&
!__asan_is_valid(ctid, sizeof(int))))) {
rc = efault();
rc = EFAULT;
} else if (IsLinux()) {
rc = CloneLinux(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else if (!IsTiny() &&
@ -599,7 +592,7 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
(CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND)) {
STRACE("clone flag unsupported on this platform");
rc = einval();
rc = EINVAL;
} else if (IsXnu()) {
rc = CloneXnu(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else if (IsFreebsd()) {
@ -611,16 +604,11 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
} else if (IsWindows()) {
rc = CloneWindows(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else {
rc = enosys();
rc = ENOSYS;
}
// TODO(jart): do we need it?
if (rc != -1 && (flags & CLONE_PARENT_SETTID)) {
*ptid = rc;
}
STRACE("clone(%t, %p, %'zu, %#x, %p, %p, %p, %p) → %d% m", func, stk, stksz,
flags, arg, ptid, tls, ctid, rc);
STRACE("clone(%t, %p, %'zu, %#x, %p, %p, %p, %p) → %s", func, stk, stksz,
flags, arg, ptid, tls, ctid, DescribeErrnoResult(rc));
return rc;
}

View file

@ -4,7 +4,7 @@
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int clone(void *, void *, size_t, int, void *, int *, void *, void *);
int clone(void *, void *, size_t, int, void *, void *, void *, void *);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -117,8 +117,8 @@ void __enable_tls(void) {
tid = sys_gettid();
}
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed);
_pthread_main.ptid = tid;
_pthread_main.tib = tib;
_pthread_main.tid = tid;
_pthread_main.flags = PT_MAINTHREAD;
__repmovsb(tls, _tdata_start, _TLDZ);

View file

@ -34,7 +34,9 @@
#include "libc/thread/tls.h"
int _fork(uint32_t dwCreationFlags) {
int ax, dx, parent;
struct CosmoTib *tib;
struct PosixThread *pt;
int ax, dx, tid, parent;
BLOCK_SIGNALS;
if (__threaded && _weaken(_pthread_onfork_prepare)) {
_weaken(_pthread_onfork_prepare)();
@ -53,9 +55,12 @@ int _fork(uint32_t dwCreationFlags) {
parent = __pid;
__pid = dx;
if (__tls_enabled) {
atomic_store_explicit(&__get_tls()->tib_tid,
IsLinux() ? dx : sys_gettid(),
memory_order_relaxed);
tib = __get_tls();
tid = IsLinux() ? dx : sys_gettid();
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed);
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed);
}
}
if (__threaded && _weaken(_pthread_onfork_child)) {
_weaken(_pthread_onfork_child)();