mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-12 01:08:00 +00:00
620 lines
21 KiB
C
620 lines
21 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2021 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/atomic.h"
|
|
#include "libc/calls/state.internal.h"
|
|
#include "libc/calls/struct/ucontext-netbsd.internal.h"
|
|
#include "libc/dce.h"
|
|
#include "libc/intrin/asmflag.h"
|
|
#include "libc/intrin/atomic.h"
|
|
#include "libc/intrin/ulock.h"
|
|
#include "libc/limits.h"
|
|
#include "libc/mem/alloca.h"
|
|
#include "libc/nt/enum/processcreationflags.h"
|
|
#include "libc/nt/runtime.h"
|
|
#include "libc/nt/synchronization.h"
|
|
#include "libc/nt/thread.h"
|
|
#include "libc/nt/thunk/msabi.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/runtime/syslib.internal.h"
|
|
#include "libc/sock/internal.h"
|
|
#include "libc/sysv/consts/arch.h"
|
|
#include "libc/thread/freebsd.internal.h"
|
|
#include "libc/thread/openbsd.internal.h"
|
|
#include "libc/thread/posixthread.internal.h"
|
|
#include "libc/thread/xnu.internal.h"
|
|
|
|
#define kMaxThreadIds 32768
|
|
#define kMinThreadId 262144
|
|
|
|
#define AMD64_SET_FSBASE 129
|
|
#define AMD64_SET_GSBASE 131
|
|
|
|
#define __NR_thr_new 455
|
|
#define __NR_clone_linux 56
|
|
#define __NR__lwp_create 309
|
|
#define __NR_getcontext_netbsd 307
|
|
#define __NR_bsdthread_create 0x02000168
|
|
#define __NR_thread_fast_set_cthread_self 0x03000003
|
|
#define PTHREAD_START_CUSTOM_XNU 0x01000000
|
|
#define LWP_DETACHED 0x00000040
|
|
#define LWP_SUSPENDED 0x00000080
|
|
|
|
struct CloneArgs {
|
|
union {
|
|
long sp;
|
|
int64_t tid64;
|
|
};
|
|
atomic_int *ptid;
|
|
atomic_int *ctid;
|
|
char *tls;
|
|
int (*func)(void *);
|
|
void *arg;
|
|
};
|
|
|
|
int sys_set_tls(uintptr_t, void *);
|
|
int __stack_call(void *, int, long, long, int (*)(void *), long);
|
|
|
|
#ifdef __x86_64__
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// THE NEW TECHNOLOGY
|
|
|
|
__msabi extern typeof(ExitThread) *const __imp_ExitThread;
|
|
__msabi extern typeof(GetCurrentThreadId) *const __imp_GetCurrentThreadId;
|
|
__msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
|
|
|
|
textwindows dontinstrument wontreturn static void //
|
|
WinThreadEntry(int rdi, // rcx
|
|
int rsi, // rdx
|
|
int rdx, // r8
|
|
struct CloneArgs *wt) { // r9
|
|
__set_tls_win32(wt->tls);
|
|
int tid = __imp_GetCurrentThreadId();
|
|
atomic_int *ctid = wt->ctid;
|
|
atomic_init(ctid, tid);
|
|
atomic_init(wt->ptid, tid);
|
|
int rc = __stack_call(wt->arg, tid, 0, 0, wt->func, wt->sp);
|
|
// we can now clear ctid directly since we're no longer using our own
|
|
// stack memory, which can now be safely free'd by the parent thread.
|
|
atomic_store_explicit(ctid, 0, memory_order_release);
|
|
__imp_WakeByAddressAll(ctid);
|
|
// since we didn't indirect this function through NT2SYSV() it's not
|
|
// safe to simply return, and as such, we need ExitThread().
|
|
__imp_ExitThread(rc);
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
textwindows static errno_t CloneWindows(int (*func)(void *), char *stk,
|
|
size_t stksz, void *arg, void *tls,
|
|
atomic_int *ptid, atomic_int *ctid) {
|
|
long sp;
|
|
int64_t h;
|
|
intptr_t tip;
|
|
uint32_t utid;
|
|
struct CloneArgs *wt;
|
|
sp = tip = (intptr_t)stk + stksz;
|
|
sp -= sizeof(struct CloneArgs);
|
|
sp &= -alignof(struct CloneArgs);
|
|
wt = (struct CloneArgs *)sp;
|
|
wt->ctid = ctid;
|
|
wt->ptid = ptid;
|
|
wt->func = func;
|
|
wt->arg = arg;
|
|
wt->tls = tls;
|
|
wt->sp = tip & -16;
|
|
if ((h = CreateThread(0, 65536, (void *)WinThreadEntry, wt,
|
|
kNtStackSizeParamIsAReservation, &utid))) {
|
|
atomic_init(ptid, utid);
|
|
struct CosmoTib *tib = tls;
|
|
atomic_store_explicit(&tib->tib_syshand, h, memory_order_release);
|
|
return 0;
|
|
} else {
|
|
return __dos2errno(GetLastError());
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// XNU'S NOT UNIX
|
|
|
|
void XnuThreadThunk(void *pthread, // rdi x0
|
|
int machport, // rsi x1
|
|
void *(*func)(void *), // rdx x2
|
|
void *arg, // rcx x3
|
|
intptr_t *stack, // r8 x4
|
|
unsigned xnuflags); // r9 x5
|
|
asm("XnuThreadThunk:\n\t"
|
|
"xor\t%ebp,%ebp\n\t"
|
|
"mov\t%r8,%rsp\n\t"
|
|
"push\t%rax\n\t"
|
|
"jmp\tXnuThreadMain\n\t"
|
|
".size\tXnuThreadThunk,.-XnuThreadThunk");
|
|
__attribute__((__used__))
|
|
|
|
dontinstrument wontreturn static void
|
|
XnuThreadMain(void *pthread, // rdi
|
|
int tid, // rsi
|
|
int (*func)(void *arg), // rdx
|
|
void *arg, // rcx
|
|
struct CloneArgs *wt, // r8
|
|
unsigned xnuflags) { // r9
|
|
atomic_init(wt->ctid, tid);
|
|
atomic_init(wt->ptid, tid);
|
|
|
|
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
|
|
// Go team at Google that they Apply stands by our ability to use it
|
|
// https://github.com/golang/go/issues/23617#issuecomment-376662373
|
|
int ax;
|
|
asm volatile("syscall"
|
|
: "=a"(ax)
|
|
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
|
|
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
|
|
|
|
func(arg);
|
|
|
|
// we no longer use the stack after this point
|
|
// %rax = int bsdthread_terminate(%rdi = void *stackaddr,
|
|
// %rsi = size_t freesize,
|
|
// %rdx = uint32_t port,
|
|
// %r10 = uint32_t sem);
|
|
asm volatile("movl\t$0,(%%rsi)\n\t" // *wt->ctid = 0
|
|
"mov\t$0x101,%%edi\n\t" // wake all
|
|
"xor\t%%edx,%%edx\n\t" // wake_value
|
|
"mov\t$0x02000204,%%eax\n\t" // ulock_wake()
|
|
"syscall\n\t" //
|
|
"xor\t%%edi,%%edi\n\t" // freeaddr
|
|
"xor\t%%esi,%%esi\n\t" // freesize
|
|
"xor\t%%edx,%%edx\n\t" // kport
|
|
"xor\t%%r10d,%%r10d\n\t" // joinsem
|
|
"mov\t$0x02000169,%%eax\n\t" // bsdthread_terminate()
|
|
"syscall"
|
|
: /* no outputs */
|
|
: "S"(wt->ctid)
|
|
: "rax", "rcx", "r10", "r11", "memory");
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, void *arg,
|
|
void *tls, atomic_int *ptid, atomic_int *ctid) {
|
|
|
|
// perform this weird mandatory system call once
|
|
static bool once;
|
|
if (!once) {
|
|
sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0);
|
|
once = true;
|
|
}
|
|
|
|
// setup stack for thread
|
|
long sp;
|
|
struct CloneArgs *wt;
|
|
sp = (intptr_t)stk + stksz;
|
|
sp -= sizeof(struct CloneArgs);
|
|
sp &= -alignof(struct CloneArgs);
|
|
wt = (struct CloneArgs *)sp;
|
|
sp &= -16;
|
|
|
|
// pass parameters to new thread via xnu
|
|
wt->ctid = ctid;
|
|
wt->ptid = ptid;
|
|
wt->tls = tls;
|
|
return sys_clone_xnu(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// OPEN BESIYATA DISHMAYA
|
|
|
|
// we can't use address sanitizer because:
|
|
// 1. __asan_handle_no_return wipes stack [todo?]
|
|
relegated dontinstrument wontreturn static void OpenbsdThreadMain(void *p) {
|
|
struct CloneArgs *wt = p;
|
|
int tid = atomic_load_explicit(wt->ctid, memory_order_relaxed);
|
|
atomic_init(wt->ptid, tid);
|
|
wt->func(wt->arg);
|
|
asm volatile("mov\t%1,%%rsp\n\t" // so syscall can validate stack exists
|
|
"movl\t$0,(%2)\n\t" // *wt->ctid = 0 (old stack now free'd)
|
|
"syscall\n\t" // futex(int*, op, val) will wake wait0
|
|
"xor\t%%edi,%%edi\n\t" // so kernel doesn't write to old stack
|
|
"mov\t$302,%%eax\n\t" // __threxit(int *notdead) doesn't wake
|
|
"syscall"
|
|
: /* no outputs */
|
|
: "a"(83), "m"(__oldstack), "D"(wt->ctid),
|
|
"S"(2 /* FUTEX_WAKE */), "d"(INT_MAX)
|
|
: "rcx", "r11", "memory");
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
relegated static errno_t CloneOpenbsd(int (*func)(void *), char *stk,
|
|
size_t stksz, void *arg, void *tls,
|
|
atomic_int *ptid, atomic_int *ctid) {
|
|
int rc;
|
|
intptr_t sp;
|
|
struct __tfork *tf;
|
|
struct CloneArgs *wt;
|
|
sp = (intptr_t)stk + stksz;
|
|
sp -= sizeof(struct __tfork);
|
|
sp &= -alignof(struct __tfork);
|
|
tf = (struct __tfork *)sp;
|
|
sp -= sizeof(struct CloneArgs);
|
|
sp &= -alignof(struct CloneArgs);
|
|
wt = (struct CloneArgs *)sp;
|
|
sp &= -16;
|
|
sp -= 8;
|
|
*(intptr_t *)sp = (intptr_t)CloneOpenbsd + 1;
|
|
wt->ctid = ctid;
|
|
wt->ptid = ptid;
|
|
wt->arg = arg;
|
|
wt->func = func;
|
|
tf->tf_stack = (char *)sp;
|
|
tf->tf_tcb = tls;
|
|
tf->tf_tid = ctid;
|
|
if ((rc = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) >= 0) {
|
|
atomic_init(ptid, rc);
|
|
return 0;
|
|
} else {
|
|
return -rc;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// NET BESIYATA DISHMAYA
|
|
|
|
wontreturn dontinstrument static void NetbsdThreadMain(
|
|
void *arg, // rdi
|
|
int (*func)(void *), // rsi
|
|
atomic_int *ctid, // rdx
|
|
atomic_int *ptid) { // rcx
|
|
int ax;
|
|
asm("syscall"
|
|
: "=a"(ax) // man says always succeeds
|
|
: "0"(311) // _lwp_self()
|
|
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
|
|
atomic_init(ctid, ax);
|
|
atomic_init(ptid, ax);
|
|
func(arg);
|
|
// we no longer use the stack after this point
|
|
// %eax = int __lwp_exit(void);
|
|
asm volatile("movl\t$0,(%2)\n\t" // *ztid = 0
|
|
"syscall" // __lwp_exit()
|
|
: "=a"(ax)
|
|
: "0"(310), "r"(ctid)
|
|
: "rcx", "r11", "memory");
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, void *arg,
|
|
void *tls, atomic_int *ptid, atomic_int *ctid) {
|
|
// NetBSD has its own clone() and it works, but it's technically a
|
|
// second-class API, intended to help Linux folks migrate to this.
|
|
int ax;
|
|
bool failed;
|
|
intptr_t dx, sp;
|
|
static bool once;
|
|
struct ucontext_netbsd *ctx;
|
|
static struct ucontext_netbsd netbsd_clone_template;
|
|
|
|
// memoize arbitrary valid processor state structure
|
|
if (!once) {
|
|
asm volatile(CFLAG_ASM("syscall")
|
|
: CFLAG_CONSTRAINT(failed), "=a"(ax)
|
|
: "1"(__NR_getcontext_netbsd), "D"(&netbsd_clone_template)
|
|
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory");
|
|
once = true;
|
|
}
|
|
sp = (intptr_t)stk + stksz;
|
|
|
|
// align the stack
|
|
sp &= -16;
|
|
|
|
// simulate call to misalign stack and ensure backtrace looks good
|
|
sp -= 8;
|
|
*(intptr_t *)sp = (intptr_t)CloneNetbsd + 1;
|
|
|
|
// place the giant 784 byte ucontext structure in the red zone!
|
|
// it only has to live long enough for the thread to come alive
|
|
ctx = (struct ucontext_netbsd *)((sp - sizeof(struct ucontext_netbsd)) & -64);
|
|
|
|
// pass parameters in process state
|
|
memcpy(ctx, &netbsd_clone_template, sizeof(*ctx));
|
|
ctx->uc_link = 0;
|
|
ctx->uc_mcontext.rbp = 0;
|
|
ctx->uc_mcontext.rsp = sp;
|
|
ctx->uc_mcontext.rip = (intptr_t)NetbsdThreadMain;
|
|
ctx->uc_mcontext.rdi = (intptr_t)arg;
|
|
ctx->uc_mcontext.rsi = (intptr_t)func;
|
|
ctx->uc_mcontext.rdx = (intptr_t)ctid;
|
|
ctx->uc_mcontext.rcx = (intptr_t)ptid;
|
|
ctx->uc_flags |= _UC_STACK;
|
|
ctx->uc_stack.ss_sp = stk;
|
|
ctx->uc_stack.ss_size = stksz;
|
|
ctx->uc_stack.ss_flags = 0;
|
|
ctx->uc_flags |= _UC_TLSBASE;
|
|
ctx->uc_mcontext._mc_tlsbase = (intptr_t)tls;
|
|
|
|
// perform the system call
|
|
int tid = 0;
|
|
asm volatile(CFLAG_ASM("syscall")
|
|
: CFLAG_CONSTRAINT(failed), "=a"(ax), "=d"(dx)
|
|
: "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(&tid)
|
|
: "rcx", "r8", "r9", "r10", "r11", "memory");
|
|
if (!failed) {
|
|
atomic_init(ptid, tid);
|
|
return 0;
|
|
} else {
|
|
return ax;
|
|
}
|
|
}
|
|
|
|
#endif /* __x86_64__ */
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// FREE BESIYATA DISHMAYA
|
|
|
|
wontreturn dontinstrument static void FreebsdThreadMain(void *p) {
|
|
struct CloneArgs *wt = p;
|
|
#ifdef __aarch64__
|
|
asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls));
|
|
#elif defined(__x86_64__)
|
|
sys_set_tls(AMD64_SET_GSBASE, wt->tls);
|
|
#endif
|
|
atomic_init(wt->ctid, wt->tid64);
|
|
atomic_init(wt->ptid, wt->tid64);
|
|
wt->func(wt->arg);
|
|
// we no longer use the stack after this point
|
|
// void thr_exit(%rdi = long *state);
|
|
#ifdef __x86_64__
|
|
asm volatile("movl\t$0,%0\n\t" // *wt->ctid = 0
|
|
"syscall\n\t" // _umtx_op(wt->ctid, WAKE, INT_MAX)
|
|
"movl\t$431,%%eax\n\t" // thr_exit(long *nonzeroes_and_wake)
|
|
"xor\t%%edi,%%edi\n\t" // sad we can't use this free futex op
|
|
"syscall\n\t" // thr_exit() fails if thread is orphaned
|
|
"movl\t$1,%%eax\n\t" // _exit()
|
|
"syscall" //
|
|
: "=m"(*wt->ctid)
|
|
: "a"(454), "D"(wt->ctid), "S"(UMTX_OP_WAKE), "d"(INT_MAX)
|
|
: "rcx", "r8", "r9", "r10", "r11", "memory");
|
|
#elif defined(__aarch64__)
|
|
register long x0 asm("x0") = (long)wt->ctid;
|
|
register long x1 asm("x1") = UMTX_OP_WAKE;
|
|
register long x2 asm("x2") = INT_MAX;
|
|
register long x8 asm("x8") = 454; // _umtx_op
|
|
asm volatile("str\twzr,%0\n\t" // *wt->ctid = 0
|
|
"svc\t0\n\t" // _umtx_op(wt->ctid, WAKE, INT_MAX)
|
|
"mov\tx0,#0\n\t" // arg0 = 0
|
|
"mov\tx8,#431\n\t" // thr_exit
|
|
"svc\t0\n\t" // thr_exit(long *nonzeroes_and_wake = 0)
|
|
"mov\tx8,#1\n\t" // _exit
|
|
"svc\t0" // _exit(long *nonzeroes_and_wake = 0)
|
|
: "=m"(*wt->ctid)
|
|
: "r"(x0), "r"(x1), "r"(x2), "r"(x8));
|
|
#else
|
|
#error "unsupported architecture"
|
|
#endif
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
static errno_t CloneFreebsd(int (*func)(void *), char *stk, size_t stksz,
|
|
void *arg, void *tls, atomic_int *ptid,
|
|
atomic_int *ctid) {
|
|
long sp;
|
|
int64_t tid64;
|
|
struct CloneArgs *wt;
|
|
sp = (intptr_t)stk + stksz;
|
|
sp -= sizeof(struct CloneArgs);
|
|
sp &= -alignof(struct CloneArgs);
|
|
wt = (struct CloneArgs *)sp;
|
|
sp &= -16;
|
|
wt->ctid = ctid;
|
|
wt->ptid = ptid;
|
|
wt->tls = tls;
|
|
wt->func = func;
|
|
wt->arg = arg;
|
|
struct thr_param params = {
|
|
.start_func = FreebsdThreadMain,
|
|
.arg = wt,
|
|
.stack_base = stk,
|
|
.stack_size = sp - (long)stk,
|
|
.tls_base = tls,
|
|
.tls_size = 64,
|
|
.child_tid = &wt->tid64,
|
|
.parent_tid = &tid64,
|
|
};
|
|
#ifdef __x86_64__
|
|
int ax;
|
|
bool failed;
|
|
asm volatile(CFLAG_ASM("syscall")
|
|
: CFLAG_CONSTRAINT(failed), "=a"(ax)
|
|
: "1"(__NR_thr_new), "D"(¶ms), "S"(sizeof(params))
|
|
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory");
|
|
if (failed)
|
|
return ax;
|
|
#elif defined(__aarch64__)
|
|
register long x0 asm("x0") = (long)¶ms;
|
|
register long x1 asm("x1") = sizeof(params);
|
|
register int x8 asm("x8") = 0x1c7; // thr_new
|
|
asm volatile("svc\t0" : "+r"(x0) : "r"(x1), "r"(x8) : "memory");
|
|
if (x0)
|
|
return x0;
|
|
#else
|
|
#error "unsupported architecture"
|
|
#endif
|
|
atomic_init(ptid, tid64);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef __aarch64__
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// APPLE SILICON
|
|
|
|
dontinstrument static void *SiliconThreadMain(void *arg) {
|
|
struct CloneArgs *wt = arg;
|
|
atomic_int *ctid = wt->ctid;
|
|
int tid = atomic_load_explicit(ctid, memory_order_relaxed);
|
|
asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls));
|
|
__stack_call(wt->arg, tid, 0, 0, wt->func, wt->sp);
|
|
atomic_store_explicit(ctid, 0, memory_order_release);
|
|
ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, ctid, 0);
|
|
return 0;
|
|
}
|
|
|
|
static errno_t CloneSilicon(int (*fn)(void *), char *stk, size_t stksz,
|
|
void *arg, void *tls, atomic_int *ptid,
|
|
atomic_int *ctid) {
|
|
|
|
// assign tid to new thread
|
|
static atomic_uint tids;
|
|
unsigned tid = atomic_fetch_add_explicit(&tids, 1, memory_order_relaxed);
|
|
tid %= kMaxThreadIds;
|
|
tid += kMinThreadId;
|
|
atomic_init(ctid, tid);
|
|
atomic_init(ptid, tid);
|
|
|
|
// pass temp data on stack
|
|
intptr_t sp, tip;
|
|
struct CloneArgs *wt;
|
|
sp = tip = (intptr_t)stk + stksz;
|
|
sp -= sizeof(struct CloneArgs);
|
|
sp &= -alignof(struct CloneArgs);
|
|
wt = (struct CloneArgs *)sp;
|
|
wt->func = fn;
|
|
wt->arg = arg;
|
|
wt->tls = tls;
|
|
wt->ctid = ctid;
|
|
wt->sp = tip & -16;
|
|
|
|
// ask apple libc to spawn thread
|
|
errno_t res;
|
|
pthread_t th;
|
|
size_t babystack = __syslib->__pthread_stack_min;
|
|
#pragma GCC push_options
|
|
#pragma GCC diagnostic ignored "-Walloca-larger-than="
|
|
void *attr = alloca(__syslib->__sizeof_pthread_attr_t);
|
|
#pragma GCC pop_options
|
|
__syslib->__pthread_attr_init(attr);
|
|
__syslib->__pthread_attr_setguardsize(attr, 0);
|
|
__syslib->__pthread_attr_setstacksize(attr, babystack);
|
|
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) {
|
|
atomic_init(ptid, tid);
|
|
struct CosmoTib *tib = tls;
|
|
atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release);
|
|
}
|
|
__syslib->__pthread_attr_destroy(attr);
|
|
return res;
|
|
}
|
|
|
|
#endif /* __aarch64__ */
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// GNU/SYSTEMD
|
|
|
|
struct LinuxCloneArgs {
|
|
int (*func)(void *);
|
|
void *arg;
|
|
char *tls;
|
|
};
|
|
|
|
int sys_clone_linux(int flags, // rdi
|
|
long sp, // rsi
|
|
atomic_int *ptid, // rdx
|
|
atomic_int *ctid, // rcx
|
|
void *tls, // r8
|
|
void *func, // r9
|
|
void *arg); // 8(rsp)
|
|
|
|
dontinstrument static int AmdLinuxThreadEntry(void *arg) {
|
|
struct LinuxCloneArgs *wt = arg;
|
|
#if defined(__x86_64__)
|
|
sys_set_tls(ARCH_SET_GS, wt->tls);
|
|
#endif
|
|
return wt->func(wt->arg);
|
|
}
|
|
|
|
static int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|
void *arg, void *tls, atomic_int *ptid,
|
|
atomic_int *ctid) {
|
|
long sp = (intptr_t)stk + stksz;
|
|
|
|
#if defined(__x86_64__)
|
|
sp -= sizeof(struct LinuxCloneArgs);
|
|
sp &= -alignof(struct LinuxCloneArgs);
|
|
struct LinuxCloneArgs *wt = (struct LinuxCloneArgs *)sp;
|
|
sp &= -16; // align the stack
|
|
wt->arg = arg;
|
|
wt->tls = tls;
|
|
wt->func = func;
|
|
func = AmdLinuxThreadEntry;
|
|
arg = wt;
|
|
#elif defined(__aarch64__)
|
|
sp &= -128; // for kernels <=4.6
|
|
#endif
|
|
|
|
int rc;
|
|
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
|
|
// clone() is documented as setting ptid before return
|
|
return 0;
|
|
} else {
|
|
return -rc;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// COSMOPOLITAN
|
|
|
|
/**
|
|
* Creates thread without malloc() being linked.
|
|
*
|
|
* If you use clone() you're on your own.
|
|
*/
|
|
errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
|
|
void *ptid, void *tls, void *ctid) {
|
|
errno_t err;
|
|
|
|
atomic_fetch_add(&_pthread_count, 1);
|
|
|
|
if (IsLinux()) {
|
|
err = CloneLinux(func, stk, stksz, flags, arg, tls, ptid, ctid);
|
|
} else if (IsXnu()) {
|
|
#if defined(__x86_64__)
|
|
err = CloneXnu(func, stk, stksz, arg, tls, ptid, ctid);
|
|
#elif defined(__aarch64__)
|
|
err = CloneSilicon(func, stk, stksz, arg, tls, ptid, ctid);
|
|
#else
|
|
#error "unsupported architecture"
|
|
#endif
|
|
} else if (IsFreebsd()) {
|
|
err = CloneFreebsd(func, stk, stksz, arg, tls, ptid, ctid);
|
|
#if defined(__x86_64__)
|
|
} else if (IsWindows()) {
|
|
err = CloneWindows(func, stk, stksz, arg, tls, ptid, ctid);
|
|
} else if (IsNetbsd()) {
|
|
err = CloneNetbsd(func, stk, stksz, arg, tls, ptid, ctid);
|
|
} else if (IsOpenbsd()) {
|
|
err = CloneOpenbsd(func, stk, stksz, arg, tls, ptid, ctid);
|
|
#endif /* __x86_64__ */
|
|
} else {
|
|
err = ENOSYS;
|
|
}
|
|
|
|
if (SupportsBsd() && err == EPROCLIM)
|
|
err = EAGAIN;
|
|
|
|
if (err)
|
|
atomic_fetch_sub(&_pthread_count, 1);
|
|
|
|
return err;
|
|
}
|