mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-30 01:02:29 +00:00
Get threads working well on MacOS Arm64
- Now using 10x better GCD semaphores - We now generate Linux-like thread ids - We now use fast system clock / sleep libraries - The APE M1 loader now generates Linux-like stacks
This commit is contained in:
parent
b5eab2b0b7
commit
bcf9af94bf
2037 changed files with 4664 additions and 4451 deletions
|
@ -27,7 +27,6 @@
|
|||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
@ -53,6 +52,9 @@
|
|||
#include "libc/thread/tls2.h"
|
||||
#include "libc/thread/xnu.internal.h"
|
||||
|
||||
#define kMaxThreadIds 32768
|
||||
#define kMinThreadId 262144
|
||||
|
||||
#define __NR_thr_new 455
|
||||
#define __NR_clone_linux 56
|
||||
#define __NR__lwp_create 309
|
||||
|
@ -64,8 +66,11 @@
|
|||
#define LWP_SUSPENDED 0x00000080
|
||||
|
||||
struct CloneArgs {
|
||||
union {
|
||||
int tid;
|
||||
_Alignas(16) union {
|
||||
struct {
|
||||
int tid;
|
||||
int this;
|
||||
};
|
||||
uint32_t utid;
|
||||
int64_t tid64;
|
||||
};
|
||||
|
@ -77,6 +82,12 @@ struct CloneArgs {
|
|||
void *arg;
|
||||
};
|
||||
|
||||
static struct CloneArgs *AllocateCloneArgs(char *stk, size_t stksz) {
|
||||
return (struct CloneArgs *)(((uintptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-16);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -108,7 +119,7 @@ WinThreadEntry(int rdi, // rcx
|
|||
int rc;
|
||||
if (wt->tls) __set_tls_win32(wt->tls);
|
||||
*wt->ctid = wt->tid;
|
||||
rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt & -16);
|
||||
rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt);
|
||||
// we can now clear ctid directly since we're no longer using our own
|
||||
// stack memory, which can now be safely free'd by the parent thread.
|
||||
*wt->ztid = 0;
|
||||
|
@ -124,9 +135,7 @@ static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk,
|
|||
void *tls, int *ptid, int *ctid) {
|
||||
int64_t h;
|
||||
struct CloneArgs *wt;
|
||||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt = AllocateCloneArgs(stk, stksz);
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->func = func;
|
||||
|
@ -155,7 +164,6 @@ void XnuThreadThunk(void *pthread, // rdi x0
|
|||
asm("XnuThreadThunk:\n\t"
|
||||
"xor\t%ebp,%ebp\n\t"
|
||||
"mov\t%r8,%rsp\n\t"
|
||||
"and\t$-16,%rsp\n\t"
|
||||
"push\t%rax\n\t"
|
||||
"jmp\tXnuThreadMain\n\t"
|
||||
".size\tXnuThreadThunk,.-XnuThreadThunk");
|
||||
|
@ -209,9 +217,7 @@ static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
|
|||
_npassert(sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0) != -1);
|
||||
once = true;
|
||||
}
|
||||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt = AllocateCloneArgs(stk, stksz);
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
|
@ -248,9 +254,7 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
|
|||
bool failed;
|
||||
int64_t tid;
|
||||
struct CloneArgs *wt;
|
||||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt = AllocateCloneArgs(stk, stksz);
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->tls = tls;
|
||||
|
@ -260,7 +264,7 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
|
|||
.start_func = FreebsdThreadMain,
|
||||
.arg = wt,
|
||||
.stack_base = stk,
|
||||
.stack_size = (((intptr_t)wt - (intptr_t)stk) & -16) - 8,
|
||||
.stack_size = (uintptr_t)wt - (uintptr_t)stk,
|
||||
.tls_base = flags & CLONE_SETTLS ? tls : 0,
|
||||
.tls_size = 64,
|
||||
.child_tid = &wt->tid64,
|
||||
|
@ -346,8 +350,7 @@ static wontreturn void NetbsdThreadMain(void *arg, // rdi
|
|||
// we no longer use the stack after this point
|
||||
// %eax = int __lwp_exit(void);
|
||||
asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0
|
||||
"syscall\n\t" // __lwp_exit()
|
||||
"ud2"
|
||||
"syscall" // __lwp_exit()
|
||||
: "=a"(ax), "=d"(dx), "=m"(*ztid)
|
||||
: "0"(310)
|
||||
: "rcx", "r11", "memory");
|
||||
|
@ -440,20 +443,18 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
|
|||
static void *SiliconThreadMain(void *arg) {
|
||||
register struct CloneArgs *wt asm("x21") = arg;
|
||||
asm volatile("ldr\tx28,%0" : /* no outputs */ : "m"(wt->tls));
|
||||
int tid = sys_gettid();
|
||||
*wt->ctid = tid;
|
||||
*wt->ptid = tid;
|
||||
*wt->ctid = wt->this;
|
||||
register long x0 asm("x0") = (long)wt->arg;
|
||||
register long x1 asm("x1") = (long)tid;
|
||||
register long x1 asm("x1") = (long)wt->tid;
|
||||
asm volatile("mov\tx19,x29\n\t" // save frame pointer
|
||||
"mov\tx20,sp\n\t" // save stack pointer
|
||||
"mov\tx29,#0\n\t" // reset backtrace
|
||||
"mov\tsp,x21\n\t" // switch stack
|
||||
"mov\tsp,%3\n\t" // switch stack
|
||||
"blr\t%2\n\t" // wt->func(wt->arg, tid)
|
||||
"mov\tx29,x19\n\t" // restore frame pointer
|
||||
"mov\tsp,x20" // restore stack pointer
|
||||
: "+r"(x0)
|
||||
: "r"(x1), "r"(wt->func)
|
||||
: "r"(x1), "r"(wt->func), "r"(wt)
|
||||
: "x19", "x20", "memory");
|
||||
*wt->ztid = 0;
|
||||
return 0;
|
||||
|
@ -462,18 +463,24 @@ static void *SiliconThreadMain(void *arg) {
|
|||
static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
|
||||
int flags, void *arg, void *tls, int *ptid,
|
||||
int *ctid) {
|
||||
errno_t res;
|
||||
unsigned tid;
|
||||
pthread_t th;
|
||||
struct CloneArgs *wt;
|
||||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-MAX(16, alignof(struct CloneArgs)));
|
||||
static atomic_uint tids;
|
||||
wt = AllocateCloneArgs(stk, stksz);
|
||||
tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel);
|
||||
wt->this = tid = (tid & (kMaxThreadIds - 1)) + kMinThreadId;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->tls = flags & CLONE_SETTLS ? tls : 0;
|
||||
wt->func = fn;
|
||||
wt->arg = arg;
|
||||
return __syslib->pthread_create(&th, 0, SiliconThreadMain, wt);
|
||||
if (!(res = __syslib->pthread_create(&th, 0, SiliconThreadMain, wt)) &&
|
||||
(flags & CLONE_PARENT_SETTID)) {
|
||||
*ptid = tid;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#endif /* __aarch64__ */
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
|
@ -27,11 +28,16 @@
|
|||
#include "libc/runtime/stack.h"
|
||||
#include "libc/runtime/syslib.internal.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/sysv/consts/sig.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
#ifndef __x86_64__
|
||||
|
||||
void __wipe(uintptr_t);
|
||||
/**
|
||||
* @fileoverview Cosmopolitan C Runtime, Second Edition
|
||||
*/
|
||||
|
||||
void __wipe(uintptr_t) _Hide;
|
||||
int main(int, char **, char **) __attribute__((__weak__));
|
||||
|
||||
typedef int init_f(int argc, char **argv, char **envp, unsigned long *auxv);
|
||||
|
@ -75,12 +81,14 @@ textstartup void cosmo(long *sp, struct Syslib *m1) {
|
|||
while (*auxv++) donothing;
|
||||
|
||||
// detect apple m1 environment
|
||||
if ((__syslib = m1)) {
|
||||
if (SupportsXnu() && (__syslib = m1)) {
|
||||
hostos = _HOSTXNU;
|
||||
magnums = syscon_xnu;
|
||||
} else {
|
||||
} else if (SupportsLinux()) {
|
||||
hostos = _HOSTLINUX;
|
||||
magnums = syscon_linux;
|
||||
} else {
|
||||
notpossible;
|
||||
}
|
||||
|
||||
// setup system magic numbers
|
||||
|
@ -88,6 +96,18 @@ textstartup void cosmo(long *sp, struct Syslib *m1) {
|
|||
*mp = *magnums++;
|
||||
}
|
||||
|
||||
// check system call abi compatibility
|
||||
if (SupportsXnu() && __syslib && __syslib->version < SYSLIB_VERSION) {
|
||||
sys_write(2, "need newer ape loader\n", 22);
|
||||
_Exit(127);
|
||||
}
|
||||
|
||||
// disable enosys trapping
|
||||
if (IsBsd()) {
|
||||
void *act[6] = {SIG_IGN};
|
||||
sys_sigaction(SIGSYS, act, 0, 8, 0);
|
||||
}
|
||||
|
||||
// needed by kisdangerous()
|
||||
__oldstack = (intptr_t)sp;
|
||||
__pid = sys_getpid().ax;
|
||||
|
@ -97,7 +117,7 @@ textstartup void cosmo(long *sp, struct Syslib *m1) {
|
|||
_mmi.p = _mmi.s;
|
||||
__mmi_lock_obj._type = PTHREAD_MUTEX_RECURSIVE;
|
||||
|
||||
// record system-provided stack to memory manager
|
||||
// record provided stack to memory manager
|
||||
_mmi.i = 1;
|
||||
_mmi.p->x = (uintptr_t)GetStackAddr() >> 16;
|
||||
_mmi.p->y = (uintptr_t)(GetStackAddr() + (GetStackSize() - FRAMESIZE)) >> 16;
|
||||
|
@ -106,6 +126,7 @@ textstartup void cosmo(long *sp, struct Syslib *m1) {
|
|||
|
||||
#if 0
|
||||
#if IsAsan()
|
||||
// TODO(jart): Figure out ASAN data model on AARCH64.
|
||||
__asan_init(argc, argv, envp, auxv);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -66,5 +66,5 @@ void __enable_threads(void) {
|
|||
STRACE("__enable_threads()");
|
||||
FixupLockNops();
|
||||
#endif
|
||||
__threaded = sys_gettid();
|
||||
__threaded = __tls_enabled ? __get_tls()->tib_tid : sys_gettid();
|
||||
}
|
||||
|
|
|
@ -190,7 +190,7 @@ textstartup void __enable_tls(void) {
|
|||
tib->tib_strace = __strace;
|
||||
tib->tib_ftrace = __ftrace;
|
||||
tib->tib_pthread = (pthread_t)&_pthread_main;
|
||||
if (IsLinux()) {
|
||||
if (IsLinux() || IsXnuSilicon()) {
|
||||
// gnu/systemd guarantees pid==tid for the main thread so we can
|
||||
// avoid issuing a superfluous system call at startup in program
|
||||
tid = __pid;
|
||||
|
|
|
@ -39,28 +39,29 @@ int sys_fork(void) {
|
|||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
int flags = 17; // SIGCHLD
|
||||
void *child_stack = 0;
|
||||
void *parent_tidptr = 0;
|
||||
void *newtls = 0;
|
||||
void *child_tidptr = 0;
|
||||
register long r0 asm("x0") = (long)flags;
|
||||
register long r1 asm("x1") = (long)child_stack;
|
||||
register long r2 asm("x2") = (long)parent_tidptr;
|
||||
register long r3 asm("x3") = (long)newtls;
|
||||
register long r4 asm("x4") = (long)child_tidptr;
|
||||
register int res_x0 asm("x0");
|
||||
register int res_x1 asm("x1");
|
||||
asm volatile("mov\tx8,%2\n\t"
|
||||
"mov\tx16,%3\n\t"
|
||||
"svc\t0"
|
||||
: "=r"(res_x0), "=r"(res_x1)
|
||||
: "i"(220), "i"(2), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4)
|
||||
: "x8", "x16", "memory");
|
||||
if (IsXnu() && res_x0 != -1) {
|
||||
res_x0 &= res_x1 - 1;
|
||||
if (IsLinux()) {
|
||||
int flags = 17; // SIGCHLD
|
||||
void *child_stack = 0;
|
||||
void *parent_tidptr = 0;
|
||||
void *newtls = 0;
|
||||
void *child_tidptr = 0;
|
||||
register long r0 asm("x0") = (long)flags;
|
||||
register long r1 asm("x1") = (long)child_stack;
|
||||
register long r2 asm("x2") = (long)parent_tidptr;
|
||||
register long r3 asm("x3") = (long)newtls;
|
||||
register long r4 asm("x4") = (long)child_tidptr;
|
||||
register int res_x0 asm("x0");
|
||||
asm volatile("mov\tx8,%1\n\t"
|
||||
"svc\t0"
|
||||
: "=r"(res_x0)
|
||||
: "i"(220), "r"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4)
|
||||
: "x8", "x16", "memory");
|
||||
return _sysret(res_x0);
|
||||
} else if (__syslib) {
|
||||
return _sysret(__syslib->fork());
|
||||
} else {
|
||||
return enosys();
|
||||
}
|
||||
return _sysret(res_x0);
|
||||
|
||||
#else
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ int _fork(uint32_t dwCreationFlags) {
|
|||
__pid = dx;
|
||||
if (__tls_enabled) {
|
||||
tib = __get_tls();
|
||||
tid = IsLinux() ? dx : sys_gettid();
|
||||
tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid();
|
||||
atomic_store_explicit(&tib->tib_tid, tid, memory_order_relaxed);
|
||||
if ((pt = (struct PosixThread *)tib->tib_pthread)) {
|
||||
atomic_store_explicit(&pt->ptid, tid, memory_order_relaxed);
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_RUNTIME_SYSLIB_H_
|
||||
#define COSMOPOLITAN_LIBC_RUNTIME_SYSLIB_H_
|
||||
#include "libc/calls/struct/iovec.h"
|
||||
#include "libc/calls/struct/sigaction.h"
|
||||
#include "libc/calls/struct/sigset.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
|
@ -8,43 +7,41 @@
|
|||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
/**
|
||||
* @fileoverview System DSO interfaces provided by APE loader.
|
||||
*
|
||||
* These functions are owned by the platform C library. Regardless of
|
||||
* platform, POSIX APIs returning `long` will follow the Linux Kernel
|
||||
* `-errno` convention, and hence should be wrapped with `_sysret()`.
|
||||
*/
|
||||
|
||||
#define SYSLIB_MAGIC ('s' | 'l' << 8 | 'i' << 16 | 'b' << 24)
|
||||
#define SYSLIB_VERSION 0
|
||||
#define SYSLIB_VERSION 1
|
||||
|
||||
typedef uint64_t dispatch_time_t;
|
||||
typedef uint64_t dispatch_semaphore_t;
|
||||
|
||||
struct Syslib {
|
||||
int magic;
|
||||
int version;
|
||||
void (*exit)(int) wontreturn;
|
||||
long (*fork)(void);
|
||||
long (*read)(int, void *, size_t);
|
||||
long (*pread)(int, void *, size_t, int64_t);
|
||||
long (*readv)(int, const struct iovec *, int);
|
||||
long (*write)(int, const void *, size_t);
|
||||
long (*pwrite)(int, const void *, size_t, int64_t);
|
||||
long (*writev)(int, const struct iovec *, int);
|
||||
long (*openat)(int, const char *, int, ...);
|
||||
long (*pipe)(int[2]);
|
||||
long (*close)(int);
|
||||
long (*clock_gettime)(int, struct timespec *);
|
||||
long (*nanosleep)(const struct timespec *, struct timespec *);
|
||||
long (*mmap)(void *, size_t, int, int, int, int64_t);
|
||||
long (*sigaction)(int, const struct sigaction *restrict,
|
||||
struct sigaction *restrict);
|
||||
int (*pthread_jit_write_protect_supported_np)(void);
|
||||
void (*pthread_jit_write_protect_np)(int);
|
||||
void (*sys_icache_invalidate)(void *, size_t);
|
||||
pthread_t (*pthread_self)(void);
|
||||
int (*pthread_create)(pthread_t *, const pthread_attr_t *, void *(*)(void *),
|
||||
void *);
|
||||
int (*pthread_detach)(pthread_t);
|
||||
int (*pthread_join)(pthread_t, void **);
|
||||
void (*pthread_exit)(void *);
|
||||
int (*pthread_kill)(pthread_t, int);
|
||||
int (*pthread_sigmask)(int, const sigset_t *restrict, sigset_t *restrict);
|
||||
int (*pthread_setname_np)(const char *);
|
||||
int (*pthread_key_create)(pthread_key_t *, void (*)(void *));
|
||||
int (*pthread_setspecific)(pthread_key_t, const void *);
|
||||
void *(*pthread_getspecific)(pthread_key_t);
|
||||
dispatch_semaphore_t (*dispatch_semaphore_create)(long);
|
||||
long (*dispatch_semaphore_signal)(dispatch_semaphore_t);
|
||||
long (*dispatch_semaphore_wait)(dispatch_semaphore_t, dispatch_time_t);
|
||||
dispatch_time_t (*dispatch_walltime)(const struct timespec *, int64_t);
|
||||
};
|
||||
|
||||
extern struct Syslib *__syslib;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue