mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-07 11:48:30 +00:00
Make _Thread_local work across platforms
We now rewrite the binary image at runtime on Windows and XNU to change mov %fs:0,%reg instructions to use %gs instead. There's also simpler threading API introduced by this change and it's called _spawn() and _join(), which has replaced most clone() usage.
This commit is contained in:
parent
e4d6e263d4
commit
5f4f6b0e69
51 changed files with 808 additions and 1043 deletions
|
@ -67,6 +67,7 @@ int chdir(const char *);
|
|||
int chmod(const char *, uint32_t);
|
||||
int chown(const char *, uint32_t, uint32_t);
|
||||
int chroot(const char *);
|
||||
int clone(void *, void *, size_t, int, void *, int *, void *, size_t, int *);
|
||||
int close(int);
|
||||
int creat(const char *, uint32_t);
|
||||
int dup(int);
|
||||
|
@ -196,9 +197,6 @@ ssize_t splice(int, int64_t *, int, int64_t *, size_t, uint32_t);
|
|||
ssize_t write(int, const void *, size_t);
|
||||
void sync(void);
|
||||
|
||||
int clone(int (*)(void *), void *, size_t, int, void *, int *, void *, size_t,
|
||||
int *);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_CALLS_SYSCALLS_H_ */
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
|
|
|
@ -108,7 +108,6 @@ o/$(MODE)/libc/intrin/describeprotflags.o: \
|
|||
OVERRIDE_CFLAGS += \
|
||||
-fno-sanitize=address
|
||||
|
||||
o/$(MODE)/libc/intrin/tls.greg.o \
|
||||
o/$(MODE)/libc/intrin/exit.greg.o \
|
||||
o/$(MODE)/libc/intrin/exit1.greg.o \
|
||||
o/$(MODE)/libc/intrin/getenv.greg.o \
|
||||
|
|
|
@ -1,125 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/nt/thread.h"
|
||||
#include "libc/nt/thunk/msabi.h"
|
||||
#include "libc/sysv/consts/nrlinux.h"
|
||||
|
||||
#define __NR_sysarch 0x000000a5 // freebsd+netbsd
|
||||
#define AMD64_SET_GSBASE 131 // freebsd
|
||||
#define AMD64_SET_FSBASE 129 // freebsd
|
||||
#define X86_SET_GSBASE 16 // netbsd
|
||||
#define X86_SET_FSBASE 17 // netbsd
|
||||
|
||||
#define __NR___set_tcb 0x00000149
|
||||
#define __NR__lwp_setprivate 0x0000013d
|
||||
#define __NR_thread_fast_set_cthread_self 0x03000003
|
||||
|
||||
/**
|
||||
* Initializes thread information block.
|
||||
*
|
||||
* Here's the layout your c library assumes:
|
||||
*
|
||||
* offset size description
|
||||
* 0x0000 0x08 linear address pointer
|
||||
* 0x0030 0x08 linear address pointer
|
||||
* 0x0038 0x04 tid
|
||||
* 0x003c 0x04 errno
|
||||
*
|
||||
*/
|
||||
privileged void *__initialize_tls(char tib[64]) {
|
||||
if (tib) {
|
||||
*(intptr_t *)(tib + 0x00) = (intptr_t)tib;
|
||||
*(intptr_t *)(tib + 0x30) = (intptr_t)tib;
|
||||
*(int *)(tib + 0x38) = -1; // tid
|
||||
*(int *)(tib + 0x3c) = 0;
|
||||
}
|
||||
return tib;
|
||||
}
|
||||
|
||||
/**
|
||||
* Installs thread information block on main process.
|
||||
*
|
||||
* For example, to set up TLS correctly for the main thread, without
|
||||
* creating any threads, then it's sufficient to say:
|
||||
*
|
||||
* __attribute__((__constructor__)) static void InitTls(void) {
|
||||
* static char tls[64];
|
||||
* __initialize_tls(tls);
|
||||
* *(int *)(tls + 0x38) = gettid();
|
||||
* *(int *)(tls + 0x3c) = __errno;
|
||||
* __install_tls(tls);
|
||||
* }
|
||||
*
|
||||
* We use a constructor here to make sure it only happens once. Please
|
||||
* note that calling `clone` will do this automatically.
|
||||
*
|
||||
* Installing TLS causes the `__tls_enabled` variable to be set. This
|
||||
* causes C library features such as `errno` and `gettid()` to use TLS.
|
||||
* This can help things like recursive mutexes go significantly faster.
|
||||
*
|
||||
* To access your TLS storage, you can call `__get_tls()` or
|
||||
* __get_tls_inline()` which return the address of the `tib`.
|
||||
*
|
||||
* @param tib is your thread information block, which must have at least
|
||||
* 64 bytes on the righthand side of the tib pointer since those are
|
||||
* the values your C library reserves for itself. memory on the left
|
||||
* side of the pointer is reserved by the linker for _Thread_local.
|
||||
*/
|
||||
privileged void __install_tls(char tib[64]) {
|
||||
int ax, dx;
|
||||
assert(tib);
|
||||
assert(!__tls_enabled);
|
||||
assert(*(int *)(tib + 0x38) != -1);
|
||||
if (IsWindows()) {
|
||||
__tls_index = TlsAlloc();
|
||||
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
|
||||
} else if (IsFreebsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsNetbsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax), "=d"(dx)
|
||||
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsXnu()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_thread_fast_set_cthread_self),
|
||||
"D"((intptr_t)tib - 0x30)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsOpenbsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR___set_tcb), "D"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib)
|
||||
: "rcx", "r11", "memory");
|
||||
}
|
||||
__tls_enabled = true;
|
||||
}
|
|
@ -16,10 +16,10 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/atomic.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/futex.internal.h"
|
||||
#include "libc/intrin/wait0.internal.h"
|
||||
#include "libc/linux/futex.h"
|
||||
|
@ -31,13 +31,13 @@
|
|||
* by the clone() system call when a thread terminates. The purpose of
|
||||
* this operation is to know when it's safe to munmap() a thread stack.
|
||||
*/
|
||||
void _wait0(const int *ptid) {
|
||||
void _wait0(const int *ctid) {
|
||||
int x;
|
||||
for (;;) {
|
||||
if (!(x = atomic_load_explicit(ptid, memory_order_acquire))) {
|
||||
if (!(x = atomic_load_explicit(ctid, memory_order_acquire))) {
|
||||
break;
|
||||
} else if (IsLinux() /* || IsOpenbsd() */) {
|
||||
_futex_wait(ptid, x, &(struct timespec){2});
|
||||
_futex_wait(ctid, x, &(struct timespec){2});
|
||||
} else {
|
||||
sched_yield();
|
||||
}
|
||||
|
|
|
@ -245,9 +245,12 @@ static wontreturn relegated noinstrument void __minicrash(int sig,
|
|||
"RIP %x\n"
|
||||
"RSP %x\n"
|
||||
"RBP %x\n"
|
||||
"PID %d\n"
|
||||
"TID %d\n"
|
||||
"\n",
|
||||
kind, sig, __argv[0], ctx ? ctx->uc_mcontext.rip : 0,
|
||||
ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0);
|
||||
ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0, __pid,
|
||||
sys_gettid());
|
||||
__restorewintty();
|
||||
_Exit(119);
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
// @param rdx is ptid
|
||||
// @param rcx is ctid
|
||||
// @param r8 is tls
|
||||
// @param r9 is func
|
||||
// @param r9 is func(void*,int)→int
|
||||
// @param 8(rsp) is arg
|
||||
// @return tid of child on success, or -1 w/ errno
|
||||
sys_clone_linux:
|
||||
|
@ -48,8 +48,9 @@ sys_clone_linux:
|
|||
jmp 0b
|
||||
2: xor %ebp,%ebp # child thread
|
||||
mov %rbx,%rdi # arg
|
||||
call *%r9 # func(arg)
|
||||
xchg %eax,%edi # func(arg) → exitcode
|
||||
mov (%r10),%esi # tid
|
||||
call *%r9 # func(arg,tid)
|
||||
xchg %eax,%edi # func(arg,tid) → exitcode
|
||||
mov $60,%eax # __NR_exit(exitcode)
|
||||
syscall
|
||||
.endfn sys_clone_linux,globl,hidden
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/calls/struct/ucontext-netbsd.internal.h"
|
||||
|
@ -24,7 +23,6 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/spinlock.h"
|
||||
#include "libc/limits.h"
|
||||
#include "libc/macros.internal.h"
|
||||
|
@ -68,19 +66,23 @@ struct CloneArgs {
|
|||
};
|
||||
union {
|
||||
char lock;
|
||||
void *pstack;
|
||||
void *oldrsp;
|
||||
};
|
||||
int *ptid;
|
||||
int *ctid;
|
||||
int *ztid;
|
||||
char *tls;
|
||||
int (*func)(void *);
|
||||
int (*func)(void *, int);
|
||||
void *arg;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// THE NEW TECHNOLOGY
|
||||
|
||||
int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp);
|
||||
int WinThreadLaunch(void *arg, // rdi
|
||||
int tid, // rsi
|
||||
int (*func)(void *, int), // rdx
|
||||
intptr_t rsp); // rcx
|
||||
|
||||
// we can't log this function because:
|
||||
// 1. windows owns the backtrace pointer right now
|
||||
|
@ -90,16 +92,20 @@ int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp);
|
|||
// 2. windows owns the stack memory right now
|
||||
// we need win32 raw imports because:
|
||||
// 1. generated thunks are function logged
|
||||
noasan noinstrument static textwindows wontreturn void WinThreadEntry(
|
||||
int rdi, int rsi, int rdx, struct CloneArgs *wt) {
|
||||
noasan noinstrument static textwindows wontreturn void //
|
||||
WinThreadEntry(int rdi, // rcx
|
||||
int rsi, // rdx
|
||||
int rdx, // r8
|
||||
struct CloneArgs *wt) { // r9
|
||||
int rc;
|
||||
if (wt->tls) {
|
||||
asm("mov\t%1,%%gs:%0"
|
||||
: "=m"(*((long *)0x1480 + __tls_index))
|
||||
: "r"(wt->tls));
|
||||
}
|
||||
*wt->ptid = wt->tid;
|
||||
*wt->ctid = wt->tid;
|
||||
rc = WinThreadLaunch(wt->arg, wt->func, (intptr_t)wt & -16);
|
||||
rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt & -16);
|
||||
// we can now clear ctid directly since we're no longer using our own
|
||||
// stack memory, which can now be safely free'd by the parent thread.
|
||||
*wt->ztid = 0;
|
||||
|
@ -109,14 +115,16 @@ noasan noinstrument static textwindows wontreturn void WinThreadEntry(
|
|||
unreachable;
|
||||
}
|
||||
|
||||
static textwindows int CloneWindows(int (*func)(void *), char *stk,
|
||||
static textwindows int CloneWindows(int (*func)(void *, int), char *stk,
|
||||
size_t stksz, int flags, void *arg,
|
||||
void *tls, size_t tlssz, int *ctid) {
|
||||
void *tls, size_t tlssz, int *ptid,
|
||||
int *ctid) {
|
||||
int64_t h;
|
||||
struct CloneArgs *wt;
|
||||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->func = func;
|
||||
|
@ -133,8 +141,12 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// XNU'S NOT UNIX
|
||||
|
||||
void XnuThreadThunk(void *pthread, int machport, void *(*func)(void *),
|
||||
void *arg, intptr_t *stack, unsigned xnuflags);
|
||||
void XnuThreadThunk(void *pthread, // rdi
|
||||
int machport, // rsi
|
||||
void *(*func)(void *), // rdx
|
||||
void *arg, // rcx
|
||||
intptr_t *stack, // r8
|
||||
unsigned xnuflags); // r9
|
||||
asm("XnuThreadThunk:\n\t"
|
||||
"xor\t%ebp,%ebp\n\t"
|
||||
"mov\t%r8,%rsp\n\t"
|
||||
|
@ -145,11 +157,18 @@ asm("XnuThreadThunk:\n\t"
|
|||
__attribute__((__used__, __no_reorder__))
|
||||
|
||||
static wontreturn void
|
||||
XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
|
||||
struct CloneArgs *wt, unsigned xnuflags) {
|
||||
XnuThreadMain(void *pthread, // rdi
|
||||
int tid, // rsi
|
||||
int (*func)(void *arg, int tid), // rdx
|
||||
void *arg, // rcx
|
||||
struct CloneArgs *wt, // r8
|
||||
unsigned xnuflags) { // r9
|
||||
int ax;
|
||||
wt->tid = tid;
|
||||
*wt->ptid = tid;
|
||||
*wt->ctid = tid;
|
||||
_spunlock(&wt->lock);
|
||||
|
||||
if (wt->tls) {
|
||||
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
|
||||
// Go team at Google that they Apply stands by our ability to use it
|
||||
|
@ -159,10 +178,9 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
|
|||
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
}
|
||||
if (wt->ctid) {
|
||||
*wt->ctid = tid;
|
||||
}
|
||||
func(arg);
|
||||
|
||||
func(arg, tid);
|
||||
|
||||
// we no longer use the stack after this point
|
||||
// %rax = int bsdthread_terminate(%rdi = void *stackaddr,
|
||||
// %rsi = size_t freesize,
|
||||
|
@ -179,7 +197,7 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
|
|||
}
|
||||
|
||||
static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
|
||||
void *arg, void *tls, size_t tlssz, int *ctid) {
|
||||
void *arg, void *tls, size_t tlssz, int *ptid, int *ctid) {
|
||||
int rc;
|
||||
bool failed;
|
||||
static bool once;
|
||||
|
@ -198,6 +216,7 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
|
|||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->tls = flags & CLONE_SETTLS ? tls : 0;
|
||||
|
@ -215,8 +234,9 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
|
|||
|
||||
static wontreturn void FreebsdThreadMain(void *p) {
|
||||
struct CloneArgs *wt = p;
|
||||
*wt->ptid = wt->tid;
|
||||
*wt->ctid = wt->tid;
|
||||
wt->func(wt->arg);
|
||||
wt->func(wt->arg, wt->tid);
|
||||
// we no longer use the stack after this point
|
||||
// void thr_exit(%rdi = long *state);
|
||||
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
|
||||
|
@ -227,8 +247,9 @@ static wontreturn void FreebsdThreadMain(void *p) {
|
|||
unreachable;
|
||||
}
|
||||
|
||||
static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
||||
void *arg, void *tls, size_t tlssz, int *ctid) {
|
||||
static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
|
||||
int flags, void *arg, void *tls, size_t tlssz,
|
||||
int *ptid, int *ctid) {
|
||||
int ax;
|
||||
bool failed;
|
||||
int64_t tid;
|
||||
|
@ -236,6 +257,7 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
|
||||
sizeof(struct CloneArgs)) &
|
||||
-alignof(struct CloneArgs));
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->tls = tls;
|
||||
|
@ -267,7 +289,9 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
|
||||
static wontreturn void OpenbsdThreadMain(void *p) {
|
||||
struct CloneArgs *wt = p;
|
||||
wt->func(wt->arg);
|
||||
*wt->ptid = wt->tid;
|
||||
*wt->ctid = wt->tid;
|
||||
wt->func(wt->arg, wt->tid);
|
||||
// we no longer use the stack after this point. however openbsd
|
||||
// validates the rsp register too so a race condition can still
|
||||
// happen if the parent tries to free the stack. we'll solve it
|
||||
|
@ -279,13 +303,14 @@ static wontreturn void OpenbsdThreadMain(void *p) {
|
|||
"movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0
|
||||
"syscall" // __threxit()
|
||||
: "=m"(*wt->ztid)
|
||||
: "a"(302), "m"(wt->pstack), "D"(wt->ztid)
|
||||
: "a"(302), "m"(wt->oldrsp), "D"(wt->ztid)
|
||||
: "rcx", "r11", "memory");
|
||||
unreachable;
|
||||
}
|
||||
|
||||
static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
||||
void *arg, void *tls, size_t tlssz, int *ctid) {
|
||||
static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
|
||||
int flags, void *arg, void *tls, size_t tlssz,
|
||||
int *ptid, int *ctid) {
|
||||
int tid;
|
||||
intptr_t sp;
|
||||
struct __tfork *tf;
|
||||
|
@ -297,13 +322,15 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
sp -= sizeof(struct CloneArgs);
|
||||
sp &= -MAX(16, alignof(struct CloneArgs));
|
||||
wt = (struct CloneArgs *)sp;
|
||||
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
|
||||
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
|
||||
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
|
||||
wt->pstack = __builtin_frame_address(0);
|
||||
wt->oldrsp = __builtin_frame_address(0);
|
||||
wt->arg = arg;
|
||||
wt->func = func;
|
||||
tf->tf_stack = (char *)wt - 8;
|
||||
tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0;
|
||||
tf->tf_tid = flags & CLONE_CHILD_SETTID ? ctid : 0;
|
||||
tf->tf_tid = &wt->tid;
|
||||
if ((tid = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) < 0) {
|
||||
errno = -tid;
|
||||
tid = -1;
|
||||
|
@ -314,11 +341,17 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// NET BESIYATA DISHMAYA
|
||||
|
||||
static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg),
|
||||
int *tid, int *ctid, int *ztid) {
|
||||
static wontreturn void NetbsdThreadMain(void *arg, // rdi
|
||||
int (*func)(void *, int), // rsi
|
||||
int *tid, // rdx
|
||||
int *ctid, // rcx
|
||||
int *ztid, // r8
|
||||
int *ptid) { // r9
|
||||
int ax, dx;
|
||||
*ctid = *tid;
|
||||
func(arg);
|
||||
ax = *tid;
|
||||
*ptid = ax;
|
||||
*ctid = ax;
|
||||
func(arg, ax);
|
||||
// we no longer use the stack after this point
|
||||
// %eax = int __lwp_exit(void);
|
||||
asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0
|
||||
|
@ -330,8 +363,9 @@ static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg),
|
|||
unreachable;
|
||||
}
|
||||
|
||||
static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
||||
void *arg, void *tls, size_t tlssz, int *ctid) {
|
||||
static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
|
||||
int flags, void *arg, void *tls, size_t tlssz, int *ptid,
|
||||
int *ctid) {
|
||||
// NetBSD has its own clone() and it works, but it's technically a
|
||||
// second-class API, intended to help Linux folks migrate to this.
|
||||
bool failed;
|
||||
|
@ -341,7 +375,6 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
static int broken;
|
||||
struct ucontext_netbsd *ctx;
|
||||
static struct ucontext_netbsd netbsd_clone_template;
|
||||
_Static_assert(sizeof(struct ucontext_netbsd) == 784, "fix assembly");
|
||||
|
||||
// memoize arbitrary valid processor state structure
|
||||
if (!once) {
|
||||
|
@ -360,7 +393,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
}
|
||||
sp = (intptr_t)(stk + stksz);
|
||||
|
||||
// allocate memory for child tid
|
||||
// allocate memory for tid
|
||||
sp -= sizeof(int);
|
||||
sp = sp & -alignof(int);
|
||||
tid = (int *)sp;
|
||||
|
@ -388,6 +421,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
ctx->uc_mcontext.rdx = (intptr_t)tid;
|
||||
ctx->uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid);
|
||||
ctx->uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid);
|
||||
ctx->uc_mcontext.r9 = (intptr_t)(flags & CLONE_PARENT_SETTID ? ptid : tid);
|
||||
ctx->uc_flags |= _UC_STACK;
|
||||
ctx->uc_stack.ss_sp = stk;
|
||||
ctx->uc_stack.ss_size = stksz;
|
||||
|
@ -413,8 +447,28 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
|
|||
////////////////////////////////////////////////////////////////////////////////
|
||||
// GNU/SYSTEMD
|
||||
|
||||
int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls,
|
||||
int (*func)(void *), void *arg);
|
||||
int sys_clone_linux(int flags, // rdi
|
||||
long sp, // rsi
|
||||
int *ptid, // rdx
|
||||
int *ctid, // rcx
|
||||
void *tls, // r8
|
||||
void *func, // r9
|
||||
void *arg); // 8(rsp)
|
||||
|
||||
static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
|
||||
int flags, void *arg, void *tls, size_t tlssz, int *ptid,
|
||||
int *ctid) {
|
||||
long sp;
|
||||
sp = (intptr_t)(stk + stksz);
|
||||
if (~flags & CLONE_CHILD_SETTID) {
|
||||
flags |= CLONE_CHILD_SETTID;
|
||||
sp -= sizeof(int);
|
||||
sp = sp & -alignof(int);
|
||||
ctid = (int *)sp;
|
||||
}
|
||||
sp = sp & -16; // align the stack
|
||||
return sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// COSMOPOLITAN
|
||||
|
@ -461,36 +515,69 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls,
|
|||
* other calls like getpid() may return incorrect values.
|
||||
*
|
||||
* @param func is your callback function, which this wrapper requires
|
||||
* not be null, otherwise EINVAL is raised
|
||||
* not be null, otherwise EINVAL is raised. It is passed two args
|
||||
* within the child thread: (1) the caller-supplied `arg` and (2)
|
||||
* the new tid is always passed in the second arg for convenience
|
||||
*
|
||||
* @param stk points to the bottom of a caller allocated stack, which
|
||||
* must be allocated via mmap() using the MAP_STACK flag, or else
|
||||
* you won't get optimal performance and it won't work on OpenBSD
|
||||
*
|
||||
* @param stksz is the size of that stack in bytes, we recommend that
|
||||
* that this be set to GetStackSize() or else memory safety tools
|
||||
* like kprintf() can't do as good and quick of a job; this value
|
||||
* must be 16-aligned plus it must be at least 4192 bytes in size
|
||||
* and it's advised to have the bottom-most page, be a guard page
|
||||
* @param flags should have:
|
||||
* - `CLONE_THREAD|CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND`
|
||||
* and you may optionally bitwise or any of the following:
|
||||
* - `CLONE_CHILD_SETTID` is needed too if you use `ctid` which
|
||||
* is part of the memory the child owns and it'll be set right
|
||||
* before the callback function is invoked
|
||||
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon termination
|
||||
* which can be used to implement join so that the parent may
|
||||
* safely free the stack memory that the child is using
|
||||
* - `CLONE_PARENT_SETTID` is needed too if you use `ptid` and this
|
||||
* is guaranteed to happen before clone() returns
|
||||
* - `CLONE_SETTLS` is needed too if you set `tls`. You may get this
|
||||
* value from the thread by calling __get_tls(). There are a few
|
||||
* layout expectations imposed by your C library. Those are all
|
||||
* documented by __initialize_tls() which initializes the parts of
|
||||
* the first 64 bytes of tls memory that libc cares about. This
|
||||
* flag will transition the C runtime to the `__tls_enabled` state
|
||||
* automatically. If it's used for one thread, then it must be
|
||||
* used for all threads. The first time it's used, it must be used
|
||||
* from the main thread.
|
||||
* @param arg will be passed to your callback
|
||||
*
|
||||
* @param flags which SHOULD always have all of these flags:
|
||||
*
|
||||
* - `CLONE_THREAD`
|
||||
* - `CLONE_VM`
|
||||
* - `CLONE_FS`
|
||||
* - `CLONE_FILES`
|
||||
* - `CLONE_SIGHAND`
|
||||
*
|
||||
* This system call wrapper is intended for threads, and as such, we
|
||||
* won't polyfill Linux's ability to simulate unrelated calls (e.g.
|
||||
* fork, vfork) via clone() on other platforms. Please just call
|
||||
* fork() and vfork() when that's what you want.
|
||||
*
|
||||
* Your `flags` may also optionally also additionally bitwise-OR any
|
||||
* combination of the following additional flags:
|
||||
*
|
||||
* - `CLONE_PARENT_SETTID` must be specified if you intend to set
|
||||
* the `ptid` argument, which is guaranteed to be updated with the
|
||||
* child tid BEFORE BOTH clone() returns and `func` is invoked
|
||||
*
|
||||
* - `CLONE_CHILD_SETTID` must be specified if you intend to set the
|
||||
* `ctid` argument, which is guaranteed to be updated with the
|
||||
* child tid before `func` is called, however we CAN NOT guarantee
|
||||
* this will happen BEFORE clone() returns
|
||||
*
|
||||
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon child thread
|
||||
* termination. This is used to implement join so that the parent
|
||||
* may know when it's safe to free the child's stack memory, and
|
||||
* as such, is guaranteed to happen AFTER the child thread has
|
||||
* either terminated or has finished using its stack memory
|
||||
*
|
||||
* - `CLONE_SETTLS` is needed if you intend to specify the `tls`
|
||||
* argument, which provides a fast-path solution for changing the
|
||||
* appropriate TLS segment register within the child thread. The
|
||||
* child thread may then obtain a reference to the TIB address you
|
||||
* supplied, by calling __get_tls(). Your C library holds certain
|
||||
* expectations about the layout of your Thread Information Block
|
||||
* (TIB), which are all documented by __initialize_tls(). That
|
||||
* function can be used to initialize the first positive 64 bytes
|
||||
* of your TLS allocation, which is the memory Cosmopolitan Libc
|
||||
* wants for itself (and negative addresses are reserved by the
|
||||
* GNU Linker). Using this flag will transition the C runtime to a
|
||||
* `__tls_enabled` state automatically. If you use TLS for just
|
||||
* one thread, then you must be specify TLS for ALL THREADS. It's
|
||||
* a good idea to do that since TLS can offer considerable (i.e.
|
||||
* multiple orders of a magnitude) performance improvement for
|
||||
* TID-dependent C library services, e.g. recursive mutexes.
|
||||
*
|
||||
* @param arg is passed as an argument to `func` in the child thread
|
||||
* @param tls may be used to set the thread local storage segment;
|
||||
* this parameter is ignored if `CLONE_SETTLS` is not set
|
||||
* @param tlssz is the size of tls in bytes which must be at least 64
|
||||
|
@ -499,8 +586,8 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls,
|
|||
* @return tid of child on success, or -1 w/ errno
|
||||
* @threadsafe
|
||||
*/
|
||||
int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
|
||||
int *ptid, void *tls, size_t tlssz, int *ctid) {
|
||||
int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
|
||||
void *tls, size_t tlssz, int *ctid) {
|
||||
int rc;
|
||||
struct CloneArgs *wt;
|
||||
|
||||
|
@ -529,8 +616,7 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
|
|||
!__asan_is_valid(ctid, sizeof(*ctid))))) {
|
||||
rc = efault();
|
||||
} else if (IsLinux()) {
|
||||
rc =
|
||||
sys_clone_linux(flags, (char *)stk + stksz, ptid, ctid, tls, func, arg);
|
||||
rc = CloneLinux(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else if (!IsTiny() &&
|
||||
(flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
|
||||
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) !=
|
||||
|
@ -539,19 +625,20 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
|
|||
STRACE("clone flag unsupported on this platform");
|
||||
rc = einval();
|
||||
} else if (IsXnu()) {
|
||||
rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ctid);
|
||||
rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else if (IsFreebsd()) {
|
||||
rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ctid);
|
||||
rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else if (IsNetbsd()) {
|
||||
rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid);
|
||||
rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else if (IsOpenbsd()) {
|
||||
rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid);
|
||||
rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else if (IsWindows()) {
|
||||
rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ctid);
|
||||
rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
|
||||
} else {
|
||||
rc = enosys();
|
||||
}
|
||||
|
||||
// TODO(jart): do we need it?
|
||||
if (rc != -1 && (flags & CLONE_PARENT_SETTID)) {
|
||||
*ptid = rc;
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) {
|
|||
lf = GetZipCfileOffset(zipos->map + cf);
|
||||
size = GetZipLfileUncompressedSize(zipos->map + lf);
|
||||
size2 = ROUNDUP(size, FRAMESIZE);
|
||||
if ((res = mapanon(size2))) {
|
||||
if ((res = _mapanon(size2))) {
|
||||
switch (ZIP_LFILE_COMPRESSIONMETHOD(zipos->map + lf)) {
|
||||
case kZipCompressionNone:
|
||||
memcpy(res, (void *)ZIP_LFILE_CONTENT(zipos->map + lf), size);
|
||||
|
|
|
@ -54,13 +54,16 @@
|
|||
* }
|
||||
*
|
||||
* That is performed automatically for unit test executables.
|
||||
*
|
||||
* @return memory map address on success, or null w/ errrno
|
||||
*/
|
||||
noasan void *mapanon(size_t size) {
|
||||
void *_mapanon(size_t size) {
|
||||
/* asan runtime depends on this function */
|
||||
void *m;
|
||||
m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (m == MAP_FAILED && weaken(__oom_hook)) {
|
||||
weaken(__oom_hook)(size);
|
||||
return 0;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,57 +16,26 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/atomic.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/futex.h"
|
||||
#include "libc/sysv/consts/nr.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
|
||||
/**
|
||||
* Waits for thread to terminate and frees its memory.
|
||||
* Allocates stack.
|
||||
*
|
||||
* @param td is thread descriptor memory
|
||||
* @param exitcode optionally receives value returned by thread
|
||||
* @return 0 on success, or error number on failure
|
||||
* @raises EDEADLK when trying to join this thread
|
||||
* @raises EINVAL if another thread is joining
|
||||
* @raises ESRCH if no such thread exists
|
||||
* @raises EINVAL if not joinable
|
||||
* @threadsafe
|
||||
* @return stack bottom address on success, or null w/ errrno
|
||||
*/
|
||||
int cthread_join(cthread_t td, void **exitcode) {
|
||||
int x, rc, tid;
|
||||
// otherwise, tid could be set to 0 even though `state` is not
|
||||
// finished mark thread as joining
|
||||
if (!td || (IsAsan() && !__asan_is_valid(td, sizeof(*td)))) {
|
||||
rc = ESRCH;
|
||||
tid = -1;
|
||||
} else if ((tid = td->tid) == gettid()) { // tid must load before lock xadd
|
||||
rc = EDEADLK;
|
||||
} else if (atomic_load(&td->state) & (cthread_detached | cthread_joining)) {
|
||||
rc = EINVAL;
|
||||
} else {
|
||||
if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) {
|
||||
while ((x = atomic_load(&td->tid))) {
|
||||
cthread_memory_wait32(&td->tid, x, 0);
|
||||
}
|
||||
}
|
||||
if (exitcode) {
|
||||
*exitcode = td->exitcode;
|
||||
}
|
||||
if (!munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom)) {
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = errno;
|
||||
}
|
||||
}
|
||||
STRACE("cthread_join(%d, [%p]) → %s", tid, !rc && exitcode ? *exitcode : 0,
|
||||
!rc ? "0" : strerrno(rc));
|
||||
return rc;
|
||||
void *_mapstack(void) {
|
||||
return mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
|
||||
MAP_STACK | MAP_ANONYMOUS, -1, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees stack.
|
||||
*
|
||||
* @param stk was allocated by _mapstack()
|
||||
*/
|
||||
int _freestack(void *stk) {
|
||||
return munmap(stk, GetStackSize());
|
||||
}
|
|
@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_
|
|||
│ cosmopolitan § runtime ─╬─│┼
|
||||
╚────────────────────────────────────────────────────────────────────────────│*/
|
||||
|
||||
typedef long jmp_buf[8] forcealign(CACHELINE);
|
||||
typedef long jmp_buf[8];
|
||||
|
||||
extern char **environ; /* CRT */
|
||||
extern int __argc; /* CRT */
|
||||
|
@ -45,8 +45,10 @@ extern size_t __virtualmax;
|
|||
extern bool __isworker;
|
||||
|
||||
void mcount(void);
|
||||
int _freestack(void *);
|
||||
unsigned long getauxval(unsigned long);
|
||||
void *mapanon(size_t) attributeallocsize((1));
|
||||
void *_mapanon(size_t) attributeallocsize((1)) mallocesque;
|
||||
void *_mapstack(void) returnsaligned((FRAMESIZE)) mallocesque;
|
||||
int setjmp(jmp_buf) libcesque returnstwice paramsnonnull();
|
||||
void longjmp(jmp_buf, int) libcesque wontreturn paramsnonnull();
|
||||
axdx_t setlongerjmp(jmp_buf) libcesque returnstwice paramsnonnull();
|
||||
|
|
|
@ -16,29 +16,206 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/nt/thread.h"
|
||||
#include "libc/nt/thunk/msabi.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/nrlinux.h"
|
||||
#include "libc/thread/thread.h"
|
||||
#include "third_party/xed/x86.h"
|
||||
|
||||
#define __NR_sysarch 0x000000a5 // freebsd+netbsd
|
||||
#define AMD64_SET_GSBASE 131 // freebsd
|
||||
#define AMD64_SET_FSBASE 129 // freebsd
|
||||
#define X86_SET_GSBASE 16 // netbsd
|
||||
#define X86_SET_FSBASE 17 // netbsd
|
||||
|
||||
#define __NR___set_tcb 0x00000149
|
||||
#define __NR__lwp_setprivate 0x0000013d
|
||||
#define __NR_thread_fast_set_cthread_self 0x03000003
|
||||
|
||||
#define _TLSZ ((intptr_t)_tls_size)
|
||||
#define _TLDZ ((intptr_t)_tdata_size)
|
||||
#define _TIBZ sizeof(struct cthread_descriptor_t)
|
||||
|
||||
static char tibdefault[64];
|
||||
extern int __threadcalls_end[];
|
||||
extern int __threadcalls_start[];
|
||||
extern unsigned char __get_tls_nt_rax[];
|
||||
__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
|
||||
|
||||
void __enable_tls(void) {
|
||||
__initialize_tls(tibdefault);
|
||||
*(int *)((char *)tibdefault + 0x38) = sys_gettid();
|
||||
*(int *)((char *)tibdefault + 0x3c) = __errno;
|
||||
__install_tls(tibdefault);
|
||||
privileged void __enable_tls(void) {
|
||||
assert(!__threaded);
|
||||
assert(!__tls_enabled);
|
||||
|
||||
// allocate tls memory for main process
|
||||
//
|
||||
// %fs Linux/BSDs
|
||||
// │
|
||||
// _Thread_local │ __get_tls()
|
||||
// ┌───┬──────────┬──────────┼───┐
|
||||
// │pad│ .tdata │ .tbss │tib│
|
||||
// └───┴──────────┴──────────┼───┘
|
||||
// │
|
||||
// Windows/Mac %gs
|
||||
//
|
||||
size_t siz;
|
||||
cthread_t tib;
|
||||
char *mem, *tls;
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE);
|
||||
mem = _mapanon(siz);
|
||||
tib = (cthread_t)(mem + siz - _TIBZ);
|
||||
tls = mem + siz - _TIBZ - _TLSZ;
|
||||
tib->self = tib;
|
||||
tib->self2 = tib;
|
||||
tib->err = __errno;
|
||||
tib->tid = sys_gettid();
|
||||
memmove(tls, _tdata_start, _TLDZ);
|
||||
|
||||
// ask the operating system to change the x86 segment register
|
||||
int ax, dx;
|
||||
if (IsWindows()) {
|
||||
__tls_index = __imp_TlsAlloc();
|
||||
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
|
||||
} else if (IsFreebsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsNetbsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax), "=d"(dx)
|
||||
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsXnu()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_thread_fast_set_cthread_self),
|
||||
"D"((intptr_t)tib - 0x30)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsOpenbsd()) {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR___set_tcb), "D"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else {
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax)
|
||||
: "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib)
|
||||
: "rcx", "r11", "memory");
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to rewrite SysV _Thread_local code. You MUST use the
|
||||
* -mno-tls-direct-seg-refs flag which generates code like this
|
||||
*
|
||||
* 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R
|
||||
*
|
||||
* Which on Mac we can replace with this:
|
||||
*
|
||||
* 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R
|
||||
*
|
||||
* Whereas on Windows we'll replace it with this:
|
||||
*
|
||||
* 0f 1f 40 00 fatnop4
|
||||
* e8 xx xx xx xx call __get_tls_nt_%R
|
||||
*
|
||||
* Since we have no idea where the TLS instructions exist in the
|
||||
* binary, we need to disassemble the whole program image. This'll
|
||||
* potentially take a few milliseconds for some larger programs.
|
||||
*
|
||||
* TODO(jart): compute probability this is just overkill
|
||||
*/
|
||||
if (IsWindows() || IsXnu()) {
|
||||
int n, reg, dis;
|
||||
unsigned char *p;
|
||||
struct XedDecodedInst xedd;
|
||||
__morph_begin();
|
||||
|
||||
// The most expensive part of this process is we need to compute the
|
||||
// byte length of each instruction in our program. We'll use Intel's
|
||||
// disassembler for this purpose.
|
||||
for (p = _ereal; p < __privileged_start; p += n) {
|
||||
xed_decoded_inst_zero_set_mode(&xedd, XED_MACHINE_MODE_LONG_64);
|
||||
if (!xed_instruction_length_decode(&xedd, p, 15)) {
|
||||
|
||||
// We now know p[0] is most likely the first byte of an x86 op.
|
||||
// Let's check and see if it's the GCC linear TIB address load.
|
||||
// We hope and pray GCC won't generate TLS stores to %r8..%r15.
|
||||
if (xedd.length == 9 && //
|
||||
0144 == p[0] && // fs
|
||||
0110 == p[1] && // rex.w (64-bit operand size)
|
||||
0213 == p[2] && // mov reg/mem → reg (word-sized)
|
||||
0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
||||
0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
||||
0000 == p[5] && // displacement (von Neumann endian)
|
||||
0000 == p[6] && // displacement
|
||||
0000 == p[7] && // displacement
|
||||
0000 == p[8]) { // displacement
|
||||
|
||||
// Apple is quite straightforward to patch. We basically
|
||||
// just change the segment register, and the linear slot
|
||||
if (IsXnu()) {
|
||||
p[0] = 0145; // this changes gs segment to fs segment
|
||||
p[5] = 0x30; // tib slot index for tib linear address
|
||||
}
|
||||
|
||||
// Windows is kind of complicated. We need to replace the
|
||||
// segment mov instruction with a function call, that (a)
|
||||
// won't clobber registers, and (b) has a return register
|
||||
// that's the same as the mov destination. When setting
|
||||
// function displacement, &CALL+5+DISP must equal &FUNC.
|
||||
else {
|
||||
reg = (p[3] & 070) >> 3;
|
||||
dis = (__get_tls_nt_rax + reg * 18) - (p + 9);
|
||||
p[0] = 0017; // map1
|
||||
p[1] = 0037; // nopl (onl if reg=0)
|
||||
p[2] = 0100; // mod/rm (%rax)+disp8
|
||||
p[3] = 0000; // displacement
|
||||
p[4] = 0350; // call
|
||||
p[5] = (dis & 0x000000ff) >> 000; // displacement
|
||||
p[6] = (dis & 0x0000ff00) >> 010; // displacement
|
||||
p[7] = (dis & 0x00ff0000) >> 020; // displacement
|
||||
p[8] = (dis & 0xff000000) >> 030; // displacement
|
||||
}
|
||||
}
|
||||
|
||||
// Move to the next instruction.
|
||||
n = xedd.length;
|
||||
} else {
|
||||
// If Xed failed to decode the instruction, then we'll just plow
|
||||
// through memory one byte at a time until Xed's morale improves
|
||||
n = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__morph_end();
|
||||
}
|
||||
|
||||
// we are now allowed to use tls
|
||||
__tls_enabled = true;
|
||||
}
|
||||
|
||||
privileged void __enable_threads(void) {
|
||||
assert(!__threaded);
|
||||
__threaded = gettid();
|
||||
__morph_begin();
|
||||
/*
|
||||
* _NOPL("__threadcalls", func)
|
||||
*
|
||||
* The big ugly macro above is used by Cosmopolitan Libc to unser
|
||||
* locking primitive (e.g. flockfile, funlockfile) have zero impact on
|
||||
* performance and binary size when threads aren't actually in play.
|
||||
*
|
||||
* we have this
|
||||
*
|
||||
* 0f 1f 05 b1 19 00 00 nopl func(%rip)
|
||||
|
@ -46,8 +223,10 @@ privileged void __enable_threads(void) {
|
|||
* we're going to turn it into this
|
||||
*
|
||||
* 67 67 e8 b1 19 00 00 addr32 addr32 call func
|
||||
*
|
||||
* This is cheap and fast because the big ugly macro stored in the
|
||||
* binary the offsets of all the instructions we need to change.
|
||||
*/
|
||||
__morph_begin();
|
||||
for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) {
|
||||
_base[*p + 0] = 0x67;
|
||||
_base[*p + 1] = 0x67;
|
||||
|
|
|
@ -26,8 +26,9 @@
|
|||
// runtime facilities.
|
||||
//
|
||||
// @param %rdi is arg
|
||||
// @param %rsi is func
|
||||
// @param %rdx is stack
|
||||
// @param %rsi is tid
|
||||
// @param %rdx is func
|
||||
// @param %rcx is stack
|
||||
// @return %rax is exit code
|
||||
// @see clone()
|
||||
WinThreadLaunch:
|
||||
|
@ -35,9 +36,9 @@ WinThreadLaunch:
|
|||
push %r15
|
||||
mov %rbp,%r15
|
||||
mov %rsp,%rbx
|
||||
mov %rdx,%rsp
|
||||
mov %rcx,%rsp
|
||||
xor %rbp,%rbp
|
||||
call *%rsi
|
||||
call *%rdx
|
||||
mov %r15,%rbp
|
||||
mov %rbx,%rsp
|
||||
pop %r15
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
* they are passed in the ≤64kb bytes preceding src.
|
||||
*
|
||||
* @return pointer to end of decoded data, similar to mempcpy()
|
||||
* @see mapanon(), lz4check()
|
||||
* @see _mapanon(), lz4check()
|
||||
*/
|
||||
void *lz4decode(void *dest, const void *src) {
|
||||
const unsigned char *frame, *block;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/nexgen32e/gettls.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
|
||||
/**
|
||||
* Returns address of thread information block.
|
||||
|
|
|
@ -40,6 +40,7 @@ LIBC_SYSV_A_FILES := \
|
|||
libc/sysv/errno_location.greg.c \
|
||||
libc/sysv/errno.c \
|
||||
libc/sysv/gettls.greg.c \
|
||||
libc/sysv/tlspolyfill.S \
|
||||
libc/sysv/errfun.S \
|
||||
libc/sysv/strace.greg.c \
|
||||
libc/sysv/describeos.greg.c \
|
||||
|
|
90
libc/sysv/tlspolyfill.S
Normal file
90
libc/sysv/tlspolyfill.S
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.internal.h"
|
||||
|
||||
// Code morphing TLS polyfills for The New Technology.
|
||||
//
|
||||
// @note msvc generates this code so it's stable
|
||||
// @note func ordering follows x86 reg encoding
|
||||
// @note each function is exactly 18 bytes
|
||||
// @see __enable_threads()
|
||||
|
||||
__get_tls_nt_rax:
|
||||
push %rcx
|
||||
mov __tls_index(%rip),%ecx
|
||||
mov %gs:0x1480(,%rcx,8),%rax
|
||||
pop %rcx
|
||||
ret
|
||||
.endfn __get_tls_nt_rax,globl,hidden
|
||||
|
||||
__get_tls_nt_rcx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rcx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rcx
|
||||
|
||||
__get_tls_nt_rdx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rdx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rdx
|
||||
|
||||
__get_tls_nt_rbx:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rbx
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rbx
|
||||
|
||||
__get_tls_nt_rsp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rsp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rsp
|
||||
|
||||
__get_tls_nt_rbp:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rbp
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rbp
|
||||
|
||||
__get_tls_nt_rsi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rsi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rsi
|
||||
|
||||
__get_tls_nt_rdi:
|
||||
push %rax
|
||||
mov __tls_index(%rip),%eax
|
||||
mov %gs:0x1480(,%rax,8),%rdi
|
||||
pop %rax
|
||||
ret
|
||||
.endfn __get_tls_nt_rdi
|
|
@ -1,133 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/bits/atomic.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/setjmp.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/clone.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/internal.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
STATIC_YOINK("_main_thread_ctor");
|
||||
|
||||
static cthread_t cthread_allocate(const cthread_attr_t *attr) {
|
||||
char *mem;
|
||||
size_t size;
|
||||
cthread_t td;
|
||||
size = ROUNDUP(
|
||||
attr->stacksize +
|
||||
ROUNDUP((uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t),
|
||||
PAGESIZE),
|
||||
FRAMESIZE);
|
||||
mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0);
|
||||
if (mem == MAP_FAILED) return 0;
|
||||
if (attr->guardsize > PAGESIZE) {
|
||||
mprotect(mem, attr->guardsize, PROT_NONE);
|
||||
}
|
||||
td = (cthread_t)(mem + size - sizeof(struct cthread_descriptor_t));
|
||||
td->self = td;
|
||||
td->self2 = td;
|
||||
td->err = errno;
|
||||
td->tid = -1;
|
||||
td->stack.bottom = mem;
|
||||
td->stack.top = mem + attr->stacksize;
|
||||
td->alloc.bottom = mem;
|
||||
td->alloc.top = mem + size;
|
||||
if (attr->mode & CTHREAD_CREATE_DETACHED) {
|
||||
td->state = cthread_detached;
|
||||
} else {
|
||||
td->state = cthread_started;
|
||||
}
|
||||
// Initialize TLS with content of .tdata section
|
||||
memmove((void *)((intptr_t)td - (intptr_t)_tls_size), _tdata_start,
|
||||
(intptr_t)_tdata_size);
|
||||
return td;
|
||||
}
|
||||
|
||||
static int cthread_start(void *arg) {
|
||||
axdx_t rc;
|
||||
void *exitcode;
|
||||
cthread_t td = arg;
|
||||
if (!(rc = setlongerjmp(td->exiter)).ax) {
|
||||
exitcode = td->func(td->arg);
|
||||
} else {
|
||||
exitcode = (void *)rc.dx;
|
||||
}
|
||||
td->exitcode = exitcode;
|
||||
_pthread_key_destruct(td->key);
|
||||
if (atomic_load(&td->state) & cthread_detached) {
|
||||
// we're still using the stack
|
||||
// thus we can't munmap it yet
|
||||
// kick the can down the road!
|
||||
cthread_zombies_add(td);
|
||||
}
|
||||
atomic_fetch_add(&td->state, cthread_finished);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates thread.
|
||||
*
|
||||
* @param ptd will receive pointer to new thread descriptor
|
||||
* @param attr contains special configuration if non-null
|
||||
* @param func is thread callback function
|
||||
* @param arg is argument supplied to `func`
|
||||
* @return 0 on success, or error number on failure
|
||||
* @threadsafe
|
||||
*/
|
||||
int cthread_create(cthread_t *ptd, const cthread_attr_t *attr,
|
||||
void *(*func)(void *), void *arg) {
|
||||
int rc, tid;
|
||||
cthread_t td;
|
||||
cthread_attr_t default_attr;
|
||||
__threaded = true;
|
||||
cthread_zombies_reap();
|
||||
cthread_attr_init(&default_attr);
|
||||
if ((td = cthread_allocate(attr ? attr : &default_attr))) {
|
||||
td->func = func;
|
||||
td->arg = arg;
|
||||
cthread_attr_destroy(&default_attr);
|
||||
tid =
|
||||
clone(cthread_start, td->stack.bottom, td->stack.top - td->stack.bottom,
|
||||
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
|
||||
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
|
||||
td, 0, td, sizeof(struct cthread_descriptor_t), &td->tid);
|
||||
if (tid != -1) {
|
||||
*ptd = td;
|
||||
rc = 0;
|
||||
} else {
|
||||
rc = errno;
|
||||
munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom);
|
||||
}
|
||||
} else {
|
||||
rc = errno;
|
||||
tid = -1;
|
||||
}
|
||||
STRACE("cthread_create([%d], %p, %p, %p) → %s", tid, attr, func, arg,
|
||||
!rc ? "0" : strerrno(rc));
|
||||
return rc;
|
||||
}
|
|
@ -21,7 +21,7 @@
|
|||
.init.start 400,_main_thread_ctor
|
||||
push %rdi
|
||||
push %rsi
|
||||
call _main_thread_init
|
||||
call __enable_tls
|
||||
pop %rsi
|
||||
pop %rdi
|
||||
.init.end 400,_main_thread_ctor
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
|
@ -16,53 +16,41 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/spawn.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
textstartup void _main_thread_init(void) {
|
||||
_Static_assert(offsetof(struct cthread_descriptor_t, self) == 0x00, "");
|
||||
_Static_assert(offsetof(struct cthread_descriptor_t, self2) == 0x30, "");
|
||||
_Static_assert(offsetof(struct cthread_descriptor_t, tid) == 0x38, "");
|
||||
_Static_assert(offsetof(struct cthread_descriptor_t, err) == 0x3c, "");
|
||||
cthread_t td;
|
||||
size_t totalsize;
|
||||
char *mem, *bottom, *top;
|
||||
#define _TLSZ ((intptr_t)_tls_size)
|
||||
#define _TLDZ ((intptr_t)_tdata_size)
|
||||
#define _TIBZ sizeof(struct cthread_descriptor_t)
|
||||
#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
|
||||
|
||||
totalsize = ROUNDUP(
|
||||
(uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t), FRAMESIZE);
|
||||
/**
|
||||
* Allocates thread-local storage memory for new thread.
|
||||
* @return buffer that must be released with free()
|
||||
*/
|
||||
char *_mktls(char **out_tib) {
|
||||
char *tls;
|
||||
cthread_t tib;
|
||||
|
||||
mem = mmap(0, totalsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
|
||||
-1, 0);
|
||||
assert(mem != MAP_FAILED);
|
||||
// Allocate enough TLS memory for all the GNU Linuker (_tls_size)
|
||||
// organized _Thread_local data, as well as Cosmpolitan Libc (64)
|
||||
if (!(tls = calloc(1, _MEMZ))) return 0;
|
||||
|
||||
bottom = mem;
|
||||
top = mem + totalsize;
|
||||
// set up thread informaiton block
|
||||
tib = (cthread_t)(tls + _MEMZ - _TIBZ);
|
||||
tib->self = tib;
|
||||
tib->self2 = tib;
|
||||
tib->err = 0;
|
||||
tib->tid = -1;
|
||||
memmove(tls, _tdata_start, _TLDZ);
|
||||
|
||||
td = (cthread_t)(top - sizeof(struct cthread_descriptor_t));
|
||||
td->self = td;
|
||||
td->self2 = td;
|
||||
td->err = errno;
|
||||
td->tid = gettid();
|
||||
td->alloc.bottom = bottom;
|
||||
td->alloc.top = top;
|
||||
td->stack.bottom = GetStackAddr(0);
|
||||
td->stack.top = td->stack.bottom + GetStackSize();
|
||||
td->state = cthread_main;
|
||||
|
||||
// Initialize TLS with content of .tdata section
|
||||
memmove((void *)((uintptr_t)td - (uintptr_t)_tls_size), _tdata_start,
|
||||
(uintptr_t)_tdata_size);
|
||||
|
||||
// Set FS
|
||||
__install_tls((char *)td);
|
||||
if (out_tib) {
|
||||
*out_tib = (char *)tib;
|
||||
}
|
||||
return tls;
|
||||
}
|
113
libc/thread/spawn.c
Normal file
113
libc/thread/spawn.c
Normal file
|
@ -0,0 +1,113 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/wait0.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/clone.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
#include "libc/sysv/consts/prot.h"
|
||||
#include "libc/thread/spawn.h"
|
||||
#include "libc/thread/thread.h"
|
||||
|
||||
STATIC_YOINK("_main_thread_ctor");
|
||||
|
||||
/**
|
||||
* @fileoverview Simple System Threads API
|
||||
*/
|
||||
|
||||
#define _TLSZ ((intptr_t)_tls_size)
|
||||
#define _TLDZ ((intptr_t)_tdata_size)
|
||||
#define _TIBZ sizeof(struct cthread_descriptor_t)
|
||||
#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
|
||||
|
||||
/**
|
||||
* Spawns thread.
|
||||
*
|
||||
* @param fun is thread worker callback, which receives `arg` and `ctid`
|
||||
* @param arg shall be passed to `fun`
|
||||
* @param opt_out_thread needn't be initialiized and is always clobbered
|
||||
* except when it isn't specified, in which case, the thread is kind
|
||||
* of detached and will leak in stack / tls memory
|
||||
* @return 0 on success, or -1 w/ errno
|
||||
*/
|
||||
int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) {
|
||||
struct spawn *th, ths;
|
||||
|
||||
// we need to to clobber the output memory before calling clone, since
|
||||
// there's no guarantee clone() won't suspend the parent, and focus on
|
||||
// running the child instead; in that case child might want to read it
|
||||
if (opt_out_thread) {
|
||||
th = opt_out_thread;
|
||||
} else {
|
||||
th = &ths;
|
||||
}
|
||||
|
||||
// Allocate enough TLS memory for all the GNU Linuker (_tls_size)
|
||||
// organized _Thread_local data, as well as Cosmpolitan Libc (64)
|
||||
if (!(th->tls = _mktls(&th->tib))) {
|
||||
return -1;
|
||||
}
|
||||
th->ctid = (int *)(th->tib + 0x38);
|
||||
|
||||
// We must use _mapstack() to allocate the stack because OpenBSD has
|
||||
// very strict requirements for what's allowed to be used for stacks
|
||||
if (!(th->stk = _mapstack())) {
|
||||
free(th->tls);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (clone(fun, th->stk, GetStackSize(),
|
||||
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
|
||||
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
|
||||
CLONE_CHILD_CLEARTID,
|
||||
arg, &th->ptid, th->tib, _TIBZ, th->ctid) == -1) {
|
||||
_freestack(th->stk);
|
||||
free(th->tls);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits for thread created by _spawn() to terminate.
|
||||
*
|
||||
* This will free your thread's stack and tls memory too.
|
||||
*/
|
||||
int _join(struct spawn *th) {
|
||||
int rc;
|
||||
if (th->ctid) {
|
||||
// wait for ctid to become zero
|
||||
_wait0(th->ctid);
|
||||
// free thread memory
|
||||
free(th->tls);
|
||||
rc = munmap(th->stk, GetStackSize());
|
||||
} else {
|
||||
rc = 0;
|
||||
}
|
||||
bzero(th, sizeof(*th));
|
||||
return rc;
|
||||
}
|
20
libc/thread/spawn.h
Normal file
20
libc/thread/spawn.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
|
||||
#define COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
struct spawn {
|
||||
int ptid;
|
||||
int *ctid;
|
||||
char *stk;
|
||||
char *tls;
|
||||
char *tib;
|
||||
};
|
||||
|
||||
int _spawn(int (*)(void *, int), void *, struct spawn *) hidden;
|
||||
int _join(struct spawn *) hidden;
|
||||
char *_mktls(char **) hidden;
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ */
|
|
@ -15,7 +15,6 @@ enum cthread_state {
|
|||
cthread_joining = 1,
|
||||
cthread_finished = 2,
|
||||
cthread_detached = 4,
|
||||
cthread_main = 127,
|
||||
};
|
||||
|
||||
struct cthread_descriptor_t {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue