Make _Thread_local work across platforms

We now rewrite the binary image at runtime on Windows and XNU to change
mov %fs:0,%reg instructions to use %gs instead. There's also simpler
threading API introduced by this change and it's called _spawn() and
_join(), which has replaced most clone() usage.
This commit is contained in:
Justine Tunney 2022-07-10 04:01:17 -07:00
parent e4d6e263d4
commit 5f4f6b0e69
51 changed files with 808 additions and 1043 deletions

View file

@ -153,7 +153,8 @@ DEFAULT_COPTS = \
-fno-gnu-unique \ -fno-gnu-unique \
-fstrict-aliasing \ -fstrict-aliasing \
-fstrict-overflow \ -fstrict-overflow \
-fno-semantic-interposition -fno-semantic-interposition \
-mno-tls-direct-seg-refs
MATHEMATICAL = \ MATHEMATICAL = \
-O3 \ -O3 \

View file

@ -43,6 +43,7 @@
#include "libc/sysv/consts/sock.h" #include "libc/sysv/consts/sock.h"
#include "libc/sysv/consts/sol.h" #include "libc/sysv/consts/sol.h"
#include "libc/sysv/consts/tcp.h" #include "libc/sysv/consts/tcp.h"
#include "libc/thread/spawn.h"
#include "libc/time/struct/tm.h" #include "libc/time/struct/tm.h"
#include "libc/time/time.h" #include "libc/time/time.h"
#include "net/http/http.h" #include "net/http/http.h"
@ -106,7 +107,7 @@ _Atomic(int) connections;
_Atomic(int) closingtime; _Atomic(int) closingtime;
const char *volatile status; const char *volatile status;
int Worker(void *id) { int Worker(void *id, int tid) {
int server, yes = 1; int server, yes = 1;
// load balance incoming connections for port 8080 across all threads // load balance incoming connections for port 8080 across all threads
@ -273,8 +274,7 @@ void PrintStatus(void) {
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
int i; int i;
char **tls; struct spawn *th;
char **stack;
uint32_t *hostips; uint32_t *hostips;
// ShowCrashReports(); // ShowCrashReports();
@ -293,37 +293,24 @@ int main(int argc, char *argv[]) {
PORT); PORT);
} }
// spawn over 9,000 worker threads
tls = 0;
stack = 0;
threads = argc > 1 ? atoi(argv[1]) : GetCpuCount(); threads = argc > 1 ? atoi(argv[1]) : GetCpuCount();
if ((1 <= threads && threads <= INT_MAX) && if ((1 <= threads && threads <= 100000)) {
(tls = malloc(threads * sizeof(*tls))) && kprintf("error: invalid number of threads\n");
(stack = malloc(threads * sizeof(*stack)))) { exit(1);
for (i = 0; i < threads; ++i) {
if ((tls[i] = __initialize_tls(malloc(64))) &&
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) {
++workers;
if (clone(Worker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_SETTLS | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, tls[i], 64,
(int *)(tls[i] + 0x38)) == -1) {
--workers;
kprintf("error: clone(%d) failed %m\n", i);
} }
} else {
kprintf("error: mmap(%d) failed %m\n", i); // spawn over 9,000 worker threads
th = calloc(threads, sizeof(*th));
for (i = 0; i < threads; ++i) {
++workers;
if (_spawn(Worker, (void *)(intptr_t)i, th + i) == -1) {
--workers;
kprintf("error: _spawn(%d) failed %m\n", i);
} }
if (!(i % 500)) { if (!(i % 500)) {
PrintStatus(); PrintStatus();
} }
} }
} else {
kprintf("error: invalid number of threads\n");
}
// wait for workers to terminate // wait for workers to terminate
while (workers) { while (workers) {
@ -335,17 +322,11 @@ int main(int argc, char *argv[]) {
kprintf("\r\e[K"); kprintf("\r\e[K");
// join the workers // join the workers
// this is how we guarantee stacks are safe to free
if (tls && stack) {
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
munmap(stack[i], GetStackSize());
free(tls[i]);
}
} }
// clean up memory // clean up memory
free(hostips); free(hostips);
free(stack); free(th);
free(tls);
} }

View file

@ -1,70 +0,0 @@
#if 0
/*─────────────────────────────────────────────────────────────────╗
To the extent possible under law, Justine Tunney has waived
all copyright and related or neighboring rights to this file,
as it is written in the following disclaimers:
http://unlicense.org/ │
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/log.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/thread/thread.h"
#include "libc/time/time.h"
cthread_sem_t semaphore;
_Thread_local int test_tls = 0x12345678;
static void *worker(void *arg) {
int tid;
cthread_t self;
cthread_sem_signal(&semaphore);
self = cthread_self();
tid = self->tid;
printf("[%p] %d -> %#x\n", self, tid, test_tls);
if (test_tls != 0x12345678) {
printf(".tdata test #2 failed\n");
}
return (void *)4;
}
int main() {
int rc, tid;
void *exitcode;
cthread_t self, thread;
if (IsWindows() || IsXnu()) {
fprintf(stderr,
"error: can't run example\n"
"_Thread_local only works on Linux/FreeBSD/NetBSD/OpenBSD\n");
return 1;
}
self = cthread_self();
tid = self->tid;
printf("[%p] %d -> %#x\n", self, tid, test_tls);
if (test_tls != 0x12345678) {
printf(".tdata test #1 failed\n");
}
cthread_sem_init(&semaphore, 0);
rc = cthread_create(&thread, NULL, &worker, NULL);
if (rc == 0) {
cthread_sem_wait(&semaphore, 0, NULL);
printf("thread created: %p\n", thread);
#if 1
cthread_join(thread, &exitcode);
#else
exitcode = cthread_detach(thread);
#endif
cthread_sem_signal(&semaphore);
cthread_sem_wait(&semaphore, 0, NULL);
printf("thread joined: %p -> %p\n", thread, exitcode);
} else {
fprintf(stderr, "ERROR: thread could not be started: %d\n", rc);
}
return 0;
}

View file

@ -67,6 +67,7 @@ int chdir(const char *);
int chmod(const char *, uint32_t); int chmod(const char *, uint32_t);
int chown(const char *, uint32_t, uint32_t); int chown(const char *, uint32_t, uint32_t);
int chroot(const char *); int chroot(const char *);
int clone(void *, void *, size_t, int, void *, int *, void *, size_t, int *);
int close(int); int close(int);
int creat(const char *, uint32_t); int creat(const char *, uint32_t);
int dup(int); int dup(int);
@ -196,9 +197,6 @@ ssize_t splice(int, int64_t *, int, int64_t *, size_t, uint32_t);
ssize_t write(int, const void *, size_t); ssize_t write(int, const void *, size_t);
void sync(void); void sync(void);
int clone(int (*)(void *), void *, size_t, int, void *, int *, void *, size_t,
int *);
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_CALLS_SYSCALLS_H_ */ #endif /* COSMOPOLITAN_LIBC_CALLS_SYSCALLS_H_ */

View file

@ -18,6 +18,7 @@
*/ */
#include "libc/calls/strace.internal.h" #include "libc/calls/strace.internal.h"
#include "libc/calls/struct/timespec.h" #include "libc/calls/struct/timespec.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h" #include "libc/fmt/itoa.h"
#include "libc/intrin/describeflags.internal.h" #include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/futex.internal.h" #include "libc/intrin/futex.internal.h"

View file

@ -108,7 +108,6 @@ o/$(MODE)/libc/intrin/describeprotflags.o: \
OVERRIDE_CFLAGS += \ OVERRIDE_CFLAGS += \
-fno-sanitize=address -fno-sanitize=address
o/$(MODE)/libc/intrin/tls.greg.o \
o/$(MODE)/libc/intrin/exit.greg.o \ o/$(MODE)/libc/intrin/exit.greg.o \
o/$(MODE)/libc/intrin/exit1.greg.o \ o/$(MODE)/libc/intrin/exit1.greg.o \
o/$(MODE)/libc/intrin/getenv.greg.o \ o/$(MODE)/libc/intrin/getenv.greg.o \

View file

@ -1,125 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/nt/thread.h"
#include "libc/nt/thunk/msabi.h"
#include "libc/sysv/consts/nrlinux.h"
#define __NR_sysarch 0x000000a5 // freebsd+netbsd
#define AMD64_SET_GSBASE 131 // freebsd
#define AMD64_SET_FSBASE 129 // freebsd
#define X86_SET_GSBASE 16 // netbsd
#define X86_SET_FSBASE 17 // netbsd
#define __NR___set_tcb 0x00000149
#define __NR__lwp_setprivate 0x0000013d
#define __NR_thread_fast_set_cthread_self 0x03000003
/**
* Initializes thread information block.
*
* Here's the layout your c library assumes:
*
* offset size description
* 0x0000 0x08 linear address pointer
* 0x0030 0x08 linear address pointer
* 0x0038 0x04 tid
* 0x003c 0x04 errno
*
*/
privileged void *__initialize_tls(char tib[64]) {
if (tib) {
*(intptr_t *)(tib + 0x00) = (intptr_t)tib;
*(intptr_t *)(tib + 0x30) = (intptr_t)tib;
*(int *)(tib + 0x38) = -1; // tid
*(int *)(tib + 0x3c) = 0;
}
return tib;
}
/**
* Installs thread information block on main process.
*
* For example, to set up TLS correctly for the main thread, without
* creating any threads, then it's sufficient to say:
*
* __attribute__((__constructor__)) static void InitTls(void) {
* static char tls[64];
* __initialize_tls(tls);
* *(int *)(tls + 0x38) = gettid();
* *(int *)(tls + 0x3c) = __errno;
* __install_tls(tls);
* }
*
* We use a constructor here to make sure it only happens once. Please
* note that calling `clone` will do this automatically.
*
* Installing TLS causes the `__tls_enabled` variable to be set. This
* causes C library features such as `errno` and `gettid()` to use TLS.
* This can help things like recursive mutexes go significantly faster.
*
* To access your TLS storage, you can call `__get_tls()` or
* __get_tls_inline()` which return the address of the `tib`.
*
* @param tib is your thread information block, which must have at least
* 64 bytes on the righthand side of the tib pointer since those are
* the values your C library reserves for itself. memory on the left
* side of the pointer is reserved by the linker for _Thread_local.
*/
privileged void __install_tls(char tib[64]) {
int ax, dx;
assert(tib);
assert(!__tls_enabled);
assert(*(int *)(tib + 0x38) != -1);
if (IsWindows()) {
__tls_index = TlsAlloc();
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
} else if (IsFreebsd()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
: "rcx", "r11", "memory", "cc");
} else if (IsNetbsd()) {
asm volatile("syscall"
: "=a"(ax), "=d"(dx)
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib)
: "rcx", "r11", "memory", "cc");
} else if (IsXnu()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_thread_fast_set_cthread_self),
"D"((intptr_t)tib - 0x30)
: "rcx", "r11", "memory", "cc");
} else if (IsOpenbsd()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR___set_tcb), "D"(tib)
: "rcx", "r11", "memory", "cc");
} else {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib)
: "rcx", "r11", "memory");
}
__tls_enabled = true;
}

View file

@ -16,10 +16,10 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h"
#include "libc/bits/atomic.h" #include "libc/bits/atomic.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/futex.internal.h" #include "libc/intrin/futex.internal.h"
#include "libc/intrin/wait0.internal.h" #include "libc/intrin/wait0.internal.h"
#include "libc/linux/futex.h" #include "libc/linux/futex.h"
@ -31,13 +31,13 @@
* by the clone() system call when a thread terminates. The purpose of * by the clone() system call when a thread terminates. The purpose of
* this operation is to know when it's safe to munmap() a thread stack. * this operation is to know when it's safe to munmap() a thread stack.
*/ */
void _wait0(const int *ptid) { void _wait0(const int *ctid) {
int x; int x;
for (;;) { for (;;) {
if (!(x = atomic_load_explicit(ptid, memory_order_acquire))) { if (!(x = atomic_load_explicit(ctid, memory_order_acquire))) {
break; break;
} else if (IsLinux() /* || IsOpenbsd() */) { } else if (IsLinux() /* || IsOpenbsd() */) {
_futex_wait(ptid, x, &(struct timespec){2}); _futex_wait(ctid, x, &(struct timespec){2});
} else { } else {
sched_yield(); sched_yield();
} }

View file

@ -245,9 +245,12 @@ static wontreturn relegated noinstrument void __minicrash(int sig,
"RIP %x\n" "RIP %x\n"
"RSP %x\n" "RSP %x\n"
"RBP %x\n" "RBP %x\n"
"PID %d\n"
"TID %d\n"
"\n", "\n",
kind, sig, __argv[0], ctx ? ctx->uc_mcontext.rip : 0, kind, sig, __argv[0], ctx ? ctx->uc_mcontext.rip : 0,
ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0); ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0, __pid,
sys_gettid());
__restorewintty(); __restorewintty();
_Exit(119); _Exit(119);
} }

View file

@ -26,7 +26,7 @@
// @param rdx is ptid // @param rdx is ptid
// @param rcx is ctid // @param rcx is ctid
// @param r8 is tls // @param r8 is tls
// @param r9 is func // @param r9 is func(void*,int)→int
// @param 8(rsp) is arg // @param 8(rsp) is arg
// @return tid of child on success, or -1 w/ errno // @return tid of child on success, or -1 w/ errno
sys_clone_linux: sys_clone_linux:
@ -48,8 +48,9 @@ sys_clone_linux:
jmp 0b jmp 0b
2: xor %ebp,%ebp # child thread 2: xor %ebp,%ebp # child thread
mov %rbx,%rdi # arg mov %rbx,%rdi # arg
call *%r9 # func(arg) mov (%r10),%esi # tid
xchg %eax,%edi # func(arg) exitcode call *%r9 # func(arg,tid)
xchg %eax,%edi # func(arg,tid) exitcode
mov $60,%eax # __NR_exit(exitcode) mov $60,%eax # __NR_exit(exitcode)
syscall syscall
.endfn sys_clone_linux,globl,hidden .endfn sys_clone_linux,globl,hidden

View file

@ -16,7 +16,6 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h" #include "libc/calls/strace.internal.h"
#include "libc/calls/struct/ucontext-netbsd.internal.h" #include "libc/calls/struct/ucontext-netbsd.internal.h"
@ -24,7 +23,6 @@
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/asan.internal.h" #include "libc/intrin/asan.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/spinlock.h" #include "libc/intrin/spinlock.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
@ -68,19 +66,23 @@ struct CloneArgs {
}; };
union { union {
char lock; char lock;
void *pstack; void *oldrsp;
}; };
int *ptid;
int *ctid; int *ctid;
int *ztid; int *ztid;
char *tls; char *tls;
int (*func)(void *); int (*func)(void *, int);
void *arg; void *arg;
}; };
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// THE NEW TECHNOLOGY // THE NEW TECHNOLOGY
int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp); int WinThreadLaunch(void *arg, // rdi
int tid, // rsi
int (*func)(void *, int), // rdx
intptr_t rsp); // rcx
// we can't log this function because: // we can't log this function because:
// 1. windows owns the backtrace pointer right now // 1. windows owns the backtrace pointer right now
@ -90,16 +92,20 @@ int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp);
// 2. windows owns the stack memory right now // 2. windows owns the stack memory right now
// we need win32 raw imports because: // we need win32 raw imports because:
// 1. generated thunks are function logged // 1. generated thunks are function logged
noasan noinstrument static textwindows wontreturn void WinThreadEntry( noasan noinstrument static textwindows wontreturn void //
int rdi, int rsi, int rdx, struct CloneArgs *wt) { WinThreadEntry(int rdi, // rcx
int rsi, // rdx
int rdx, // r8
struct CloneArgs *wt) { // r9
int rc; int rc;
if (wt->tls) { if (wt->tls) {
asm("mov\t%1,%%gs:%0" asm("mov\t%1,%%gs:%0"
: "=m"(*((long *)0x1480 + __tls_index)) : "=m"(*((long *)0x1480 + __tls_index))
: "r"(wt->tls)); : "r"(wt->tls));
} }
*wt->ptid = wt->tid;
*wt->ctid = wt->tid; *wt->ctid = wt->tid;
rc = WinThreadLaunch(wt->arg, wt->func, (intptr_t)wt & -16); rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt & -16);
// we can now clear ctid directly since we're no longer using our own // we can now clear ctid directly since we're no longer using our own
// stack memory, which can now be safely free'd by the parent thread. // stack memory, which can now be safely free'd by the parent thread.
*wt->ztid = 0; *wt->ztid = 0;
@ -109,14 +115,16 @@ noasan noinstrument static textwindows wontreturn void WinThreadEntry(
unreachable; unreachable;
} }
static textwindows int CloneWindows(int (*func)(void *), char *stk, static textwindows int CloneWindows(int (*func)(void *, int), char *stk,
size_t stksz, int flags, void *arg, size_t stksz, int flags, void *arg,
void *tls, size_t tlssz, int *ctid) { void *tls, size_t tlssz, int *ptid,
int *ctid) {
int64_t h; int64_t h;
struct CloneArgs *wt; struct CloneArgs *wt;
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) & sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs)); -alignof(struct CloneArgs));
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->func = func; wt->func = func;
@ -133,8 +141,12 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk,
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// XNU'S NOT UNIX // XNU'S NOT UNIX
void XnuThreadThunk(void *pthread, int machport, void *(*func)(void *), void XnuThreadThunk(void *pthread, // rdi
void *arg, intptr_t *stack, unsigned xnuflags); int machport, // rsi
void *(*func)(void *), // rdx
void *arg, // rcx
intptr_t *stack, // r8
unsigned xnuflags); // r9
asm("XnuThreadThunk:\n\t" asm("XnuThreadThunk:\n\t"
"xor\t%ebp,%ebp\n\t" "xor\t%ebp,%ebp\n\t"
"mov\t%r8,%rsp\n\t" "mov\t%r8,%rsp\n\t"
@ -145,11 +157,18 @@ asm("XnuThreadThunk:\n\t"
__attribute__((__used__, __no_reorder__)) __attribute__((__used__, __no_reorder__))
static wontreturn void static wontreturn void
XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg, XnuThreadMain(void *pthread, // rdi
struct CloneArgs *wt, unsigned xnuflags) { int tid, // rsi
int (*func)(void *arg, int tid), // rdx
void *arg, // rcx
struct CloneArgs *wt, // r8
unsigned xnuflags) { // r9
int ax; int ax;
wt->tid = tid; wt->tid = tid;
*wt->ptid = tid;
*wt->ctid = tid;
_spunlock(&wt->lock); _spunlock(&wt->lock);
if (wt->tls) { if (wt->tls) {
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the // XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
// Go team at Google that they Apply stands by our ability to use it // Go team at Google that they Apply stands by our ability to use it
@ -159,10 +178,9 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30) : "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
: "rcx", "r11", "memory", "cc"); : "rcx", "r11", "memory", "cc");
} }
if (wt->ctid) {
*wt->ctid = tid; func(arg, tid);
}
func(arg);
// we no longer use the stack after this point // we no longer use the stack after this point
// %rax = int bsdthread_terminate(%rdi = void *stackaddr, // %rax = int bsdthread_terminate(%rdi = void *stackaddr,
// %rsi = size_t freesize, // %rsi = size_t freesize,
@ -179,7 +197,7 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
} }
static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
void *arg, void *tls, size_t tlssz, int *ctid) { void *arg, void *tls, size_t tlssz, int *ptid, int *ctid) {
int rc; int rc;
bool failed; bool failed;
static bool once; static bool once;
@ -198,6 +216,7 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) & sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs)); -alignof(struct CloneArgs));
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0; wt->tls = flags & CLONE_SETTLS ? tls : 0;
@ -215,8 +234,9 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
static wontreturn void FreebsdThreadMain(void *p) { static wontreturn void FreebsdThreadMain(void *p) {
struct CloneArgs *wt = p; struct CloneArgs *wt = p;
*wt->ptid = wt->tid;
*wt->ctid = wt->tid; *wt->ctid = wt->tid;
wt->func(wt->arg); wt->func(wt->arg, wt->tid);
// we no longer use the stack after this point // we no longer use the stack after this point
// void thr_exit(%rdi = long *state); // void thr_exit(%rdi = long *state);
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0 asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
@ -227,8 +247,9 @@ static wontreturn void FreebsdThreadMain(void *p) {
unreachable; unreachable;
} }
static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
void *arg, void *tls, size_t tlssz, int *ctid) { int flags, void *arg, void *tls, size_t tlssz,
int *ptid, int *ctid) {
int ax; int ax;
bool failed; bool failed;
int64_t tid; int64_t tid;
@ -236,6 +257,7 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) & sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs)); -alignof(struct CloneArgs));
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = tls; wt->tls = tls;
@ -267,7 +289,9 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
static wontreturn void OpenbsdThreadMain(void *p) { static wontreturn void OpenbsdThreadMain(void *p) {
struct CloneArgs *wt = p; struct CloneArgs *wt = p;
wt->func(wt->arg); *wt->ptid = wt->tid;
*wt->ctid = wt->tid;
wt->func(wt->arg, wt->tid);
// we no longer use the stack after this point. however openbsd // we no longer use the stack after this point. however openbsd
// validates the rsp register too so a race condition can still // validates the rsp register too so a race condition can still
// happen if the parent tries to free the stack. we'll solve it // happen if the parent tries to free the stack. we'll solve it
@ -279,13 +303,14 @@ static wontreturn void OpenbsdThreadMain(void *p) {
"movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0 "movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0
"syscall" // __threxit() "syscall" // __threxit()
: "=m"(*wt->ztid) : "=m"(*wt->ztid)
: "a"(302), "m"(wt->pstack), "D"(wt->ztid) : "a"(302), "m"(wt->oldrsp), "D"(wt->ztid)
: "rcx", "r11", "memory"); : "rcx", "r11", "memory");
unreachable; unreachable;
} }
static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz,
void *arg, void *tls, size_t tlssz, int *ctid) { int flags, void *arg, void *tls, size_t tlssz,
int *ptid, int *ctid) {
int tid; int tid;
intptr_t sp; intptr_t sp;
struct __tfork *tf; struct __tfork *tf;
@ -297,13 +322,15 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -MAX(16, alignof(struct CloneArgs)); sp &= -MAX(16, alignof(struct CloneArgs));
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->pstack = __builtin_frame_address(0); wt->oldrsp = __builtin_frame_address(0);
wt->arg = arg; wt->arg = arg;
wt->func = func; wt->func = func;
tf->tf_stack = (char *)wt - 8; tf->tf_stack = (char *)wt - 8;
tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0; tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0;
tf->tf_tid = flags & CLONE_CHILD_SETTID ? ctid : 0; tf->tf_tid = &wt->tid;
if ((tid = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) < 0) { if ((tid = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) < 0) {
errno = -tid; errno = -tid;
tid = -1; tid = -1;
@ -314,11 +341,17 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// NET BESIYATA DISHMAYA // NET BESIYATA DISHMAYA
static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg), static wontreturn void NetbsdThreadMain(void *arg, // rdi
int *tid, int *ctid, int *ztid) { int (*func)(void *, int), // rsi
int *tid, // rdx
int *ctid, // rcx
int *ztid, // r8
int *ptid) { // r9
int ax, dx; int ax, dx;
*ctid = *tid; ax = *tid;
func(arg); *ptid = ax;
*ctid = ax;
func(arg, ax);
// we no longer use the stack after this point // we no longer use the stack after this point
// %eax = int __lwp_exit(void); // %eax = int __lwp_exit(void);
asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0 asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0
@ -330,8 +363,9 @@ static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg),
unreachable; unreachable;
} }
static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
void *arg, void *tls, size_t tlssz, int *ctid) { int flags, void *arg, void *tls, size_t tlssz, int *ptid,
int *ctid) {
// NetBSD has its own clone() and it works, but it's technically a // NetBSD has its own clone() and it works, but it's technically a
// second-class API, intended to help Linux folks migrate to this. // second-class API, intended to help Linux folks migrate to this.
bool failed; bool failed;
@ -341,7 +375,6 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
static int broken; static int broken;
struct ucontext_netbsd *ctx; struct ucontext_netbsd *ctx;
static struct ucontext_netbsd netbsd_clone_template; static struct ucontext_netbsd netbsd_clone_template;
_Static_assert(sizeof(struct ucontext_netbsd) == 784, "fix assembly");
// memoize arbitrary valid processor state structure // memoize arbitrary valid processor state structure
if (!once) { if (!once) {
@ -360,7 +393,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
} }
sp = (intptr_t)(stk + stksz); sp = (intptr_t)(stk + stksz);
// allocate memory for child tid // allocate memory for tid
sp -= sizeof(int); sp -= sizeof(int);
sp = sp & -alignof(int); sp = sp & -alignof(int);
tid = (int *)sp; tid = (int *)sp;
@ -388,6 +421,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
ctx->uc_mcontext.rdx = (intptr_t)tid; ctx->uc_mcontext.rdx = (intptr_t)tid;
ctx->uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid); ctx->uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid);
ctx->uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid); ctx->uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid);
ctx->uc_mcontext.r9 = (intptr_t)(flags & CLONE_PARENT_SETTID ? ptid : tid);
ctx->uc_flags |= _UC_STACK; ctx->uc_flags |= _UC_STACK;
ctx->uc_stack.ss_sp = stk; ctx->uc_stack.ss_sp = stk;
ctx->uc_stack.ss_size = stksz; ctx->uc_stack.ss_size = stksz;
@ -413,8 +447,28 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// GNU/SYSTEMD // GNU/SYSTEMD
int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls, int sys_clone_linux(int flags, // rdi
int (*func)(void *), void *arg); long sp, // rsi
int *ptid, // rdx
int *ctid, // rcx
void *tls, // r8
void *func, // r9
void *arg); // 8(rsp)
static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
int flags, void *arg, void *tls, size_t tlssz, int *ptid,
int *ctid) {
long sp;
sp = (intptr_t)(stk + stksz);
if (~flags & CLONE_CHILD_SETTID) {
flags |= CLONE_CHILD_SETTID;
sp -= sizeof(int);
sp = sp & -alignof(int);
ctid = (int *)sp;
}
sp = sp & -16; // align the stack
return sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg);
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// COSMOPOLITAN // COSMOPOLITAN
@ -461,36 +515,69 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls,
* other calls like getpid() may return incorrect values. * other calls like getpid() may return incorrect values.
* *
* @param func is your callback function, which this wrapper requires * @param func is your callback function, which this wrapper requires
* not be null, otherwise EINVAL is raised * not be null, otherwise EINVAL is raised. It is passed two args
* within the child thread: (1) the caller-supplied `arg` and (2)
* the new tid is always passed in the second arg for convenience
*
* @param stk points to the bottom of a caller allocated stack, which * @param stk points to the bottom of a caller allocated stack, which
* must be allocated via mmap() using the MAP_STACK flag, or else * must be allocated via mmap() using the MAP_STACK flag, or else
* you won't get optimal performance and it won't work on OpenBSD * you won't get optimal performance and it won't work on OpenBSD
*
* @param stksz is the size of that stack in bytes, we recommend that * @param stksz is the size of that stack in bytes, we recommend that
* that this be set to GetStackSize() or else memory safety tools * that this be set to GetStackSize() or else memory safety tools
* like kprintf() can't do as good and quick of a job; this value * like kprintf() can't do as good and quick of a job; this value
* must be 16-aligned plus it must be at least 4192 bytes in size * must be 16-aligned plus it must be at least 4192 bytes in size
* and it's advised to have the bottom-most page, be a guard page * and it's advised to have the bottom-most page, be a guard page
* @param flags should have: *
* - `CLONE_THREAD|CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND` * @param flags which SHOULD always have all of these flags:
* and you may optionally bitwise or any of the following: *
* - `CLONE_CHILD_SETTID` is needed too if you use `ctid` which * - `CLONE_THREAD`
* is part of the memory the child owns and it'll be set right * - `CLONE_VM`
* before the callback function is invoked * - `CLONE_FS`
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon termination * - `CLONE_FILES`
* which can be used to implement join so that the parent may * - `CLONE_SIGHAND`
* safely free the stack memory that the child is using *
* - `CLONE_PARENT_SETTID` is needed too if you use `ptid` and this * This system call wrapper is intended for threads, and as such, we
* is guaranteed to happen before clone() returns * won't polyfill Linux's ability to simulate unrelated calls (e.g.
* - `CLONE_SETTLS` is needed too if you set `tls`. You may get this * fork, vfork) via clone() on other platforms. Please just call
* value from the thread by calling __get_tls(). There are a few * fork() and vfork() when that's what you want.
* layout expectations imposed by your C library. Those are all *
* documented by __initialize_tls() which initializes the parts of * Your `flags` may also optionally also additionally bitwise-OR any
* the first 64 bytes of tls memory that libc cares about. This * combination of the following additional flags:
* flag will transition the C runtime to the `__tls_enabled` state *
* automatically. If it's used for one thread, then it must be * - `CLONE_PARENT_SETTID` must be specified if you intend to set
* used for all threads. The first time it's used, it must be used * the `ptid` argument, which is guaranteed to be updated with the
* from the main thread. * child tid BEFORE BOTH clone() returns and `func` is invoked
* @param arg will be passed to your callback *
* - `CLONE_CHILD_SETTID` must be specified if you intend to set the
* `ctid` argument, which is guaranteed to be updated with the
* child tid before `func` is called, however we CAN NOT guarantee
* this will happen BEFORE clone() returns
*
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon child thread
* termination. This is used to implement join so that the parent
* may know when it's safe to free the child's stack memory, and
* as such, is guaranteed to happen AFTER the child thread has
* either terminated or has finished using its stack memory
*
* - `CLONE_SETTLS` is needed if you intend to specify the `tls`
* argument, which provides a fast-path solution for changing the
* appropriate TLS segment register within the child thread. The
* child thread may then obtain a reference to the TIB address you
* supplied, by calling __get_tls(). Your C library holds certain
* expectations about the layout of your Thread Information Block
* (TIB), which are all documented by __initialize_tls(). That
* function can be used to initialize the first positive 64 bytes
* of your TLS allocation, which is the memory Cosmopolitan Libc
* wants for itself (and negative addresses are reserved by the
* GNU Linker). Using this flag will transition the C runtime to a
* `__tls_enabled` state automatically. If you use TLS for just
* one thread, then you must be specify TLS for ALL THREADS. It's
* a good idea to do that since TLS can offer considerable (i.e.
* multiple orders of a magnitude) performance improvement for
* TID-dependent C library services, e.g. recursive mutexes.
*
* @param arg is passed as an argument to `func` in the child thread
* @param tls may be used to set the thread local storage segment; * @param tls may be used to set the thread local storage segment;
* this parameter is ignored if `CLONE_SETTLS` is not set * this parameter is ignored if `CLONE_SETTLS` is not set
* @param tlssz is the size of tls in bytes which must be at least 64 * @param tlssz is the size of tls in bytes which must be at least 64
@ -499,8 +586,8 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls,
* @return tid of child on success, or -1 w/ errno * @return tid of child on success, or -1 w/ errno
* @threadsafe * @threadsafe
*/ */
int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
int *ptid, void *tls, size_t tlssz, int *ctid) { void *tls, size_t tlssz, int *ctid) {
int rc; int rc;
struct CloneArgs *wt; struct CloneArgs *wt;
@ -529,8 +616,7 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
!__asan_is_valid(ctid, sizeof(*ctid))))) { !__asan_is_valid(ctid, sizeof(*ctid))))) {
rc = efault(); rc = efault();
} else if (IsLinux()) { } else if (IsLinux()) {
rc = rc = CloneLinux(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
sys_clone_linux(flags, (char *)stk + stksz, ptid, ctid, tls, func, arg);
} else if (!IsTiny() && } else if (!IsTiny() &&
(flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) != CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) !=
@ -539,19 +625,20 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
STRACE("clone flag unsupported on this platform"); STRACE("clone flag unsupported on this platform");
rc = einval(); rc = einval();
} else if (IsXnu()) { } else if (IsXnu()) {
rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ctid); rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
} else if (IsFreebsd()) { } else if (IsFreebsd()) {
rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
} else if (IsNetbsd()) { } else if (IsNetbsd()) {
rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
} else if (IsOpenbsd()) { } else if (IsOpenbsd()) {
rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
} else if (IsWindows()) { } else if (IsWindows()) {
rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ctid); rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid);
} else { } else {
rc = enosys(); rc = enosys();
} }
// TODO(jart): do we need it?
if (rc != -1 && (flags & CLONE_PARENT_SETTID)) { if (rc != -1 && (flags & CLONE_PARENT_SETTID)) {
*ptid = rc; *ptid = rc;
} }

View file

@ -64,7 +64,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) {
lf = GetZipCfileOffset(zipos->map + cf); lf = GetZipCfileOffset(zipos->map + cf);
size = GetZipLfileUncompressedSize(zipos->map + lf); size = GetZipLfileUncompressedSize(zipos->map + lf);
size2 = ROUNDUP(size, FRAMESIZE); size2 = ROUNDUP(size, FRAMESIZE);
if ((res = mapanon(size2))) { if ((res = _mapanon(size2))) {
switch (ZIP_LFILE_COMPRESSIONMETHOD(zipos->map + lf)) { switch (ZIP_LFILE_COMPRESSIONMETHOD(zipos->map + lf)) {
case kZipCompressionNone: case kZipCompressionNone:
memcpy(res, (void *)ZIP_LFILE_CONTENT(zipos->map + lf), size); memcpy(res, (void *)ZIP_LFILE_CONTENT(zipos->map + lf), size);

View file

@ -54,13 +54,16 @@
* } * }
* *
* That is performed automatically for unit test executables. * That is performed automatically for unit test executables.
*
* @return memory map address on success, or null w/ errrno
*/ */
noasan void *mapanon(size_t size) { void *_mapanon(size_t size) {
/* asan runtime depends on this function */ /* asan runtime depends on this function */
void *m; void *m;
m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (m == MAP_FAILED && weaken(__oom_hook)) { if (m == MAP_FAILED && weaken(__oom_hook)) {
weaken(__oom_hook)(size); weaken(__oom_hook)(size);
return 0;
} }
return m; return m;
} }

View file

@ -16,25 +16,26 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/nexgen32e/gettls.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/testlib/testlib.h" #include "libc/runtime/stack.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
static char tib[64]; /**
* Allocates stack.
TEST(tls, test) { *
errno = 31337; * @return stack bottom address on success, or null w/ errrno
EXPECT_EQ(31337, errno); */
EXPECT_EQ(&__errno, __errno_location()); void *_mapstack(void) {
__initialize_tls(tib); return mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
*(int *)((char *)tib + 0x38) = gettid(); MAP_STACK | MAP_ANONYMOUS, -1, 0);
*(int *)((char *)tib + 0x3c) = __errno; }
__install_tls(tib);
EXPECT_EQ(31337, errno); /**
EXPECT_EQ(tib, __get_tls()); * Frees stack.
EXPECT_EQ(tib, __get_tls_inline()); *
EXPECT_EQ(tib + 0x3c, (char *)__errno_location()); * @param stk was allocated by _mapstack()
*/
int _freestack(void *stk) {
return munmap(stk, GetStackSize());
} }

View file

@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_
cosmopolitan § runtime cosmopolitan § runtime
*/ */
typedef long jmp_buf[8] forcealign(CACHELINE); typedef long jmp_buf[8];
extern char **environ; /* CRT */ extern char **environ; /* CRT */
extern int __argc; /* CRT */ extern int __argc; /* CRT */
@ -45,8 +45,10 @@ extern size_t __virtualmax;
extern bool __isworker; extern bool __isworker;
void mcount(void); void mcount(void);
int _freestack(void *);
unsigned long getauxval(unsigned long); unsigned long getauxval(unsigned long);
void *mapanon(size_t) attributeallocsize((1)); void *_mapanon(size_t) attributeallocsize((1)) mallocesque;
void *_mapstack(void) returnsaligned((FRAMESIZE)) mallocesque;
int setjmp(jmp_buf) libcesque returnstwice paramsnonnull(); int setjmp(jmp_buf) libcesque returnstwice paramsnonnull();
void longjmp(jmp_buf, int) libcesque wontreturn paramsnonnull(); void longjmp(jmp_buf, int) libcesque wontreturn paramsnonnull();
axdx_t setlongerjmp(jmp_buf) libcesque returnstwice paramsnonnull(); axdx_t setlongerjmp(jmp_buf) libcesque returnstwice paramsnonnull();

View file

@ -16,29 +16,206 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h"
#include "libc/bits/bits.h"
#include "libc/calls/calls.h" #include "libc/calls/calls.h"
#include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h" #include "libc/errno.h"
#include "libc/intrin/kprintf.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h" #include "libc/nexgen32e/threaded.h"
#include "libc/nt/thread.h"
#include "libc/nt/thunk/msabi.h"
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/nrlinux.h"
#include "libc/thread/thread.h"
#include "third_party/xed/x86.h"
#define __NR_sysarch 0x000000a5 // freebsd+netbsd
#define AMD64_SET_GSBASE 131 // freebsd
#define AMD64_SET_FSBASE 129 // freebsd
#define X86_SET_GSBASE 16 // netbsd
#define X86_SET_FSBASE 17 // netbsd
#define __NR___set_tcb 0x00000149
#define __NR__lwp_setprivate 0x0000013d
#define __NR_thread_fast_set_cthread_self 0x03000003
#define _TLSZ ((intptr_t)_tls_size)
#define _TLDZ ((intptr_t)_tdata_size)
#define _TIBZ sizeof(struct cthread_descriptor_t)
static char tibdefault[64];
extern int __threadcalls_end[]; extern int __threadcalls_end[];
extern int __threadcalls_start[]; extern int __threadcalls_start[];
extern unsigned char __get_tls_nt_rax[];
__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
void __enable_tls(void) { privileged void __enable_tls(void) {
__initialize_tls(tibdefault); assert(!__threaded);
*(int *)((char *)tibdefault + 0x38) = sys_gettid(); assert(!__tls_enabled);
*(int *)((char *)tibdefault + 0x3c) = __errno;
__install_tls(tibdefault); // allocate tls memory for main process
//
// %fs Linux/BSDs
// │
// _Thread_local │ __get_tls()
// ┌───┬──────────┬──────────┼───┐
// │pad│ .tdata │ .tbss │tib│
// └───┴──────────┴──────────┼───┘
// │
// Windows/Mac %gs
//
size_t siz;
cthread_t tib;
char *mem, *tls;
siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE);
mem = _mapanon(siz);
tib = (cthread_t)(mem + siz - _TIBZ);
tls = mem + siz - _TIBZ - _TLSZ;
tib->self = tib;
tib->self2 = tib;
tib->err = __errno;
tib->tid = sys_gettid();
memmove(tls, _tdata_start, _TLDZ);
// ask the operating system to change the x86 segment register
int ax, dx;
if (IsWindows()) {
__tls_index = __imp_TlsAlloc();
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
} else if (IsFreebsd()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
: "rcx", "r11", "memory", "cc");
} else if (IsNetbsd()) {
asm volatile("syscall"
: "=a"(ax), "=d"(dx)
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib)
: "rcx", "r11", "memory", "cc");
} else if (IsXnu()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_thread_fast_set_cthread_self),
"D"((intptr_t)tib - 0x30)
: "rcx", "r11", "memory", "cc");
} else if (IsOpenbsd()) {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR___set_tcb), "D"(tib)
: "rcx", "r11", "memory", "cc");
} else {
asm volatile("syscall"
: "=a"(ax)
: "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib)
: "rcx", "r11", "memory");
}
/*
* We need to rewrite SysV _Thread_local code. You MUST use the
* -mno-tls-direct-seg-refs flag which generates code like this
*
* 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R
*
* Which on Mac we can replace with this:
*
* 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R
*
* Whereas on Windows we'll replace it with this:
*
* 0f 1f 40 00 fatnop4
* e8 xx xx xx xx call __get_tls_nt_%R
*
* Since we have no idea where the TLS instructions exist in the
* binary, we need to disassemble the whole program image. This'll
* potentially take a few milliseconds for some larger programs.
*
* TODO(jart): compute probability this is just overkill
*/
if (IsWindows() || IsXnu()) {
int n, reg, dis;
unsigned char *p;
struct XedDecodedInst xedd;
__morph_begin();
// The most expensive part of this process is we need to compute the
// byte length of each instruction in our program. We'll use Intel's
// disassembler for this purpose.
for (p = _ereal; p < __privileged_start; p += n) {
xed_decoded_inst_zero_set_mode(&xedd, XED_MACHINE_MODE_LONG_64);
if (!xed_instruction_length_decode(&xedd, p, 15)) {
// We now know p[0] is most likely the first byte of an x86 op.
// Let's check and see if it's the GCC linear TIB address load.
// We hope and pray GCC won't generate TLS stores to %r8..%r15.
if (xedd.length == 9 && //
0144 == p[0] && // fs
0110 == p[1] && // rex.w (64-bit operand size)
0213 == p[2] && // mov reg/mem → reg (word-sized)
0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
0000 == p[5] && // displacement (von Neumann endian)
0000 == p[6] && // displacement
0000 == p[7] && // displacement
0000 == p[8]) { // displacement
// Apple is quite straightforward to patch. We basically
// just change the segment register, and the linear slot
if (IsXnu()) {
p[0] = 0145; // this changes gs segment to fs segment
p[5] = 0x30; // tib slot index for tib linear address
}
// Windows is kind of complicated. We need to replace the
// segment mov instruction with a function call, that (a)
// won't clobber registers, and (b) has a return register
// that's the same as the mov destination. When setting
// function displacement, &CALL+5+DISP must equal &FUNC.
else {
reg = (p[3] & 070) >> 3;
dis = (__get_tls_nt_rax + reg * 18) - (p + 9);
p[0] = 0017; // map1
p[1] = 0037; // nopl (onl if reg=0)
p[2] = 0100; // mod/rm (%rax)+disp8
p[3] = 0000; // displacement
p[4] = 0350; // call
p[5] = (dis & 0x000000ff) >> 000; // displacement
p[6] = (dis & 0x0000ff00) >> 010; // displacement
p[7] = (dis & 0x00ff0000) >> 020; // displacement
p[8] = (dis & 0xff000000) >> 030; // displacement
}
}
// Move to the next instruction.
n = xedd.length;
} else {
// If Xed failed to decode the instruction, then we'll just plow
// through memory one byte at a time until Xed's morale improves
n = 1;
}
}
__morph_end();
}
// we are now allowed to use tls
__tls_enabled = true;
} }
privileged void __enable_threads(void) { privileged void __enable_threads(void) {
assert(!__threaded);
__threaded = gettid(); __threaded = gettid();
__morph_begin();
/* /*
* _NOPL("__threadcalls", func) * _NOPL("__threadcalls", func)
* *
* The big ugly macro above is used by Cosmopolitan Libc to unser
* locking primitive (e.g. flockfile, funlockfile) have zero impact on
* performance and binary size when threads aren't actually in play.
*
* we have this * we have this
* *
* 0f 1f 05 b1 19 00 00 nopl func(%rip) * 0f 1f 05 b1 19 00 00 nopl func(%rip)
@ -46,8 +223,10 @@ privileged void __enable_threads(void) {
* we're going to turn it into this * we're going to turn it into this
* *
* 67 67 e8 b1 19 00 00 addr32 addr32 call func * 67 67 e8 b1 19 00 00 addr32 addr32 call func
*
* This is cheap and fast because the big ugly macro stored in the
* binary the offsets of all the instructions we need to change.
*/ */
__morph_begin();
for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) { for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) {
_base[*p + 0] = 0x67; _base[*p + 0] = 0x67;
_base[*p + 1] = 0x67; _base[*p + 1] = 0x67;

View file

@ -26,8 +26,9 @@
// runtime facilities. // runtime facilities.
// //
// @param %rdi is arg // @param %rdi is arg
// @param %rsi is func // @param %rsi is tid
// @param %rdx is stack // @param %rdx is func
// @param %rcx is stack
// @return %rax is exit code // @return %rax is exit code
// @see clone() // @see clone()
WinThreadLaunch: WinThreadLaunch:
@ -35,9 +36,9 @@ WinThreadLaunch:
push %r15 push %r15
mov %rbp,%r15 mov %rbp,%r15
mov %rsp,%rbx mov %rsp,%rbx
mov %rdx,%rsp mov %rcx,%rsp
xor %rbp,%rbp xor %rbp,%rbp
call *%rsi call *%rdx
mov %r15,%rbp mov %r15,%rbp
mov %rbx,%rsp mov %rbx,%rsp
pop %r15 pop %r15

View file

@ -32,7 +32,7 @@
* they are passed in the 64kb bytes preceding src. * they are passed in the 64kb bytes preceding src.
* *
* @return pointer to end of decoded data, similar to mempcpy() * @return pointer to end of decoded data, similar to mempcpy()
* @see mapanon(), lz4check() * @see _mapanon(), lz4check()
*/ */
void *lz4decode(void *dest, const void *src) { void *lz4decode(void *dest, const void *src) {
const unsigned char *frame, *block; const unsigned char *frame, *block;

View file

@ -17,6 +17,7 @@
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/nexgen32e/gettls.h" #include "libc/nexgen32e/gettls.h"
#include "libc/nexgen32e/threaded.h"
/** /**
* Returns address of thread information block. * Returns address of thread information block.

View file

@ -40,6 +40,7 @@ LIBC_SYSV_A_FILES := \
libc/sysv/errno_location.greg.c \ libc/sysv/errno_location.greg.c \
libc/sysv/errno.c \ libc/sysv/errno.c \
libc/sysv/gettls.greg.c \ libc/sysv/gettls.greg.c \
libc/sysv/tlspolyfill.S \
libc/sysv/errfun.S \ libc/sysv/errfun.S \
libc/sysv/strace.greg.c \ libc/sysv/strace.greg.c \
libc/sysv/describeos.greg.c \ libc/sysv/describeos.greg.c \

90
libc/sysv/tlspolyfill.S Normal file
View file

@ -0,0 +1,90 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
// Code morphing TLS polyfills for The New Technology.
//
// @note msvc generates this code so it's stable
// @note func ordering follows x86 reg encoding
// @note each function is exactly 18 bytes
// @see __enable_threads()
__get_tls_nt_rax:
push %rcx
mov __tls_index(%rip),%ecx
mov %gs:0x1480(,%rcx,8),%rax
pop %rcx
ret
.endfn __get_tls_nt_rax,globl,hidden
__get_tls_nt_rcx:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rcx
pop %rax
ret
.endfn __get_tls_nt_rcx
__get_tls_nt_rdx:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rdx
pop %rax
ret
.endfn __get_tls_nt_rdx
__get_tls_nt_rbx:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rbx
pop %rax
ret
.endfn __get_tls_nt_rbx
__get_tls_nt_rsp:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rsp
pop %rax
ret
.endfn __get_tls_nt_rsp
__get_tls_nt_rbp:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rbp
pop %rax
ret
.endfn __get_tls_nt_rbp
__get_tls_nt_rsi:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rsi
pop %rax
ret
.endfn __get_tls_nt_rsi
__get_tls_nt_rdi:
push %rax
mov __tls_index(%rip),%eax
mov %gs:0x1480(,%rax,8),%rdi
pop %rax
ret
.endfn __get_tls_nt_rdi

View file

@ -1,133 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/errno.h"
#include "libc/intrin/setjmp.internal.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/internal.h"
#include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
static cthread_t cthread_allocate(const cthread_attr_t *attr) {
char *mem;
size_t size;
cthread_t td;
size = ROUNDUP(
attr->stacksize +
ROUNDUP((uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t),
PAGESIZE),
FRAMESIZE);
mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) return 0;
if (attr->guardsize > PAGESIZE) {
mprotect(mem, attr->guardsize, PROT_NONE);
}
td = (cthread_t)(mem + size - sizeof(struct cthread_descriptor_t));
td->self = td;
td->self2 = td;
td->err = errno;
td->tid = -1;
td->stack.bottom = mem;
td->stack.top = mem + attr->stacksize;
td->alloc.bottom = mem;
td->alloc.top = mem + size;
if (attr->mode & CTHREAD_CREATE_DETACHED) {
td->state = cthread_detached;
} else {
td->state = cthread_started;
}
// Initialize TLS with content of .tdata section
memmove((void *)((intptr_t)td - (intptr_t)_tls_size), _tdata_start,
(intptr_t)_tdata_size);
return td;
}
static int cthread_start(void *arg) {
axdx_t rc;
void *exitcode;
cthread_t td = arg;
if (!(rc = setlongerjmp(td->exiter)).ax) {
exitcode = td->func(td->arg);
} else {
exitcode = (void *)rc.dx;
}
td->exitcode = exitcode;
_pthread_key_destruct(td->key);
if (atomic_load(&td->state) & cthread_detached) {
// we're still using the stack
// thus we can't munmap it yet
// kick the can down the road!
cthread_zombies_add(td);
}
atomic_fetch_add(&td->state, cthread_finished);
return 0;
}
/**
* Creates thread.
*
* @param ptd will receive pointer to new thread descriptor
* @param attr contains special configuration if non-null
* @param func is thread callback function
* @param arg is argument supplied to `func`
* @return 0 on success, or error number on failure
* @threadsafe
*/
int cthread_create(cthread_t *ptd, const cthread_attr_t *attr,
void *(*func)(void *), void *arg) {
int rc, tid;
cthread_t td;
cthread_attr_t default_attr;
__threaded = true;
cthread_zombies_reap();
cthread_attr_init(&default_attr);
if ((td = cthread_allocate(attr ? attr : &default_attr))) {
td->func = func;
td->arg = arg;
cthread_attr_destroy(&default_attr);
tid =
clone(cthread_start, td->stack.bottom, td->stack.top - td->stack.bottom,
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
td, 0, td, sizeof(struct cthread_descriptor_t), &td->tid);
if (tid != -1) {
*ptd = td;
rc = 0;
} else {
rc = errno;
munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom);
}
} else {
rc = errno;
tid = -1;
}
STRACE("cthread_create([%d], %p, %p, %p) → %s", tid, attr, func, arg,
!rc ? "0" : strerrno(rc));
return rc;
}

View file

@ -21,7 +21,7 @@
.init.start 400,_main_thread_ctor .init.start 400,_main_thread_ctor
push %rdi push %rdi
push %rsi push %rsi
call _main_thread_init call __enable_tls
pop %rsi pop %rsi
pop %rdi pop %rdi
.init.end 400,_main_thread_ctor .init.end 400,_main_thread_ctor

View file

@ -1,72 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/thread.h"
/**
* Waits for thread to terminate and frees its memory.
*
* @param td is thread descriptor memory
* @param exitcode optionally receives value returned by thread
* @return 0 on success, or error number on failure
* @raises EDEADLK when trying to join this thread
* @raises EINVAL if another thread is joining
* @raises ESRCH if no such thread exists
* @raises EINVAL if not joinable
* @threadsafe
*/
int cthread_join(cthread_t td, void **exitcode) {
int x, rc, tid;
// otherwise, tid could be set to 0 even though `state` is not
// finished mark thread as joining
if (!td || (IsAsan() && !__asan_is_valid(td, sizeof(*td)))) {
rc = ESRCH;
tid = -1;
} else if ((tid = td->tid) == gettid()) { // tid must load before lock xadd
rc = EDEADLK;
} else if (atomic_load(&td->state) & (cthread_detached | cthread_joining)) {
rc = EINVAL;
} else {
if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) {
while ((x = atomic_load(&td->tid))) {
cthread_memory_wait32(&td->tid, x, 0);
}
}
if (exitcode) {
*exitcode = td->exitcode;
}
if (!munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom)) {
rc = 0;
} else {
rc = errno;
}
}
STRACE("cthread_join(%d, [%p]) → %s", tid, !rc && exitcode ? *exitcode : 0,
!rc ? "0" : strerrno(rc));
return rc;
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the any purpose with or without fee is hereby granted, provided that the
@ -16,53 +16,41 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h" #include "libc/mem/mem.h"
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/sysv/consts/map.h" #include "libc/thread/spawn.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
textstartup void _main_thread_init(void) { #define _TLSZ ((intptr_t)_tls_size)
_Static_assert(offsetof(struct cthread_descriptor_t, self) == 0x00, ""); #define _TLDZ ((intptr_t)_tdata_size)
_Static_assert(offsetof(struct cthread_descriptor_t, self2) == 0x30, ""); #define _TIBZ sizeof(struct cthread_descriptor_t)
_Static_assert(offsetof(struct cthread_descriptor_t, tid) == 0x38, ""); #define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
_Static_assert(offsetof(struct cthread_descriptor_t, err) == 0x3c, "");
cthread_t td;
size_t totalsize;
char *mem, *bottom, *top;
totalsize = ROUNDUP( /**
(uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t), FRAMESIZE); * Allocates thread-local storage memory for new thread.
* @return buffer that must be released with free()
*/
char *_mktls(char **out_tib) {
char *tls;
cthread_t tib;
mem = mmap(0, totalsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, // Allocate enough TLS memory for all the GNU Linuker (_tls_size)
-1, 0); // organized _Thread_local data, as well as Cosmpolitan Libc (64)
assert(mem != MAP_FAILED); if (!(tls = calloc(1, _MEMZ))) return 0;
bottom = mem; // set up thread informaiton block
top = mem + totalsize; tib = (cthread_t)(tls + _MEMZ - _TIBZ);
tib->self = tib;
tib->self2 = tib;
tib->err = 0;
tib->tid = -1;
memmove(tls, _tdata_start, _TLDZ);
td = (cthread_t)(top - sizeof(struct cthread_descriptor_t)); if (out_tib) {
td->self = td; *out_tib = (char *)tib;
td->self2 = td; }
td->err = errno; return tls;
td->tid = gettid();
td->alloc.bottom = bottom;
td->alloc.top = top;
td->stack.bottom = GetStackAddr(0);
td->stack.top = td->stack.bottom + GetStackSize();
td->state = cthread_main;
// Initialize TLS with content of .tdata section
memmove((void *)((uintptr_t)td - (uintptr_t)_tls_size), _tdata_start,
(uintptr_t)_tdata_size);
// Set FS
__install_tls((char *)td);
} }

113
libc/thread/spawn.c Normal file
View file

@ -0,0 +1,113 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
/**
* @fileoverview Simple System Threads API
*/
#define _TLSZ ((intptr_t)_tls_size)
#define _TLDZ ((intptr_t)_tdata_size)
#define _TIBZ sizeof(struct cthread_descriptor_t)
#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
/**
* Spawns thread.
*
* @param fun is thread worker callback, which receives `arg` and `ctid`
* @param arg shall be passed to `fun`
* @param opt_out_thread needn't be initialiized and is always clobbered
* except when it isn't specified, in which case, the thread is kind
* of detached and will leak in stack / tls memory
* @return 0 on success, or -1 w/ errno
*/
int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) {
struct spawn *th, ths;
// we need to to clobber the output memory before calling clone, since
// there's no guarantee clone() won't suspend the parent, and focus on
// running the child instead; in that case child might want to read it
if (opt_out_thread) {
th = opt_out_thread;
} else {
th = &ths;
}
// Allocate enough TLS memory for all the GNU Linuker (_tls_size)
// organized _Thread_local data, as well as Cosmpolitan Libc (64)
if (!(th->tls = _mktls(&th->tib))) {
return -1;
}
th->ctid = (int *)(th->tib + 0x38);
// We must use _mapstack() to allocate the stack because OpenBSD has
// very strict requirements for what's allowed to be used for stacks
if (!(th->stk = _mapstack())) {
free(th->tls);
return -1;
}
if (clone(fun, th->stk, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
arg, &th->ptid, th->tib, _TIBZ, th->ctid) == -1) {
_freestack(th->stk);
free(th->tls);
return -1;
}
return 0;
}
/**
* Waits for thread created by _spawn() to terminate.
*
* This will free your thread's stack and tls memory too.
*/
int _join(struct spawn *th) {
int rc;
if (th->ctid) {
// wait for ctid to become zero
_wait0(th->ctid);
// free thread memory
free(th->tls);
rc = munmap(th->stk, GetStackSize());
} else {
rc = 0;
}
bzero(th, sizeof(*th));
return rc;
}

20
libc/thread/spawn.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
#define COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct spawn {
int ptid;
int *ctid;
char *stk;
char *tls;
char *tib;
};
int _spawn(int (*)(void *, int), void *, struct spawn *) hidden;
int _join(struct spawn *) hidden;
char *_mktls(char **) hidden;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ */

View file

@ -15,7 +15,6 @@ enum cthread_state {
cthread_joining = 1, cthread_joining = 1,
cthread_finished = 2, cthread_finished = 2,
cthread_detached = 4, cthread_detached = 4,
cthread_main = 127,
}; };
struct cthread_descriptor_t { struct cthread_descriptor_t {

View file

@ -40,6 +40,7 @@
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/testlib/hyperion.h" #include "libc/testlib/hyperion.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/time/time.h" #include "libc/time/time.h"
STATIC_YOINK("zip_uri_support"); STATIC_YOINK("zip_uri_support");
@ -51,9 +52,6 @@ void PullSomeZipFilesIntoLinkage(void) {
gmtime(0); gmtime(0);
} }
char *stack[THREADS];
char tls[THREADS][64];
TEST(reservefd, testGrowthOfFdsDataStructure) { TEST(reservefd, testGrowthOfFdsDataStructure) {
int i, n; int i, n;
struct rlimit rlim; struct rlimit rlim;
@ -87,7 +85,7 @@ void OnSigAlrm(int sig, siginfo_t *si, ucontext_t *ctx) {
close(fd); // can eintr which doesn't matter close(fd); // can eintr which doesn't matter
} }
int Worker(void *p) { int Worker(void *p, int tid) {
char buf[64]; char buf[64];
int i, rc, fd; int i, rc, fd;
for (i = 0; i < 64; ++i) { for (i = 0; i < 64; ++i) {
@ -111,6 +109,7 @@ int Worker(void *p) {
TEST(reservefd, tortureTest) { TEST(reservefd, tortureTest) {
int i; int i;
struct spawn th[THREADS];
struct sigaction oldsa; struct sigaction oldsa;
struct itimerval oldit; struct itimerval oldit;
struct itimerval it = {{0, 10000}, {0, 100}}; struct itimerval it = {{0, 10000}, {0, 100}};
@ -119,17 +118,10 @@ TEST(reservefd, tortureTest) {
// ASSERT_SYS(0, 0, sigaction(SIGALRM, &sa, &oldsa)); // ASSERT_SYS(0, 0, sigaction(SIGALRM, &sa, &oldsa));
// ASSERT_SYS(0, 0, setitimer(ITIMER_REAL, &it, &oldit)); // ASSERT_SYS(0, 0, setitimer(ITIMER_REAL, &it, &oldit));
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
clone(Worker, _spawn(Worker, 0, th + i);
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)),
GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
0, 0, __initialize_tls(tls[i]), sizeof(tls[i]),
(int *)(tls[i] + 0x38));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
} }
// EXPECT_SYS(0, 0, sigaction(SIGALRM, &oldsa, 0)); // EXPECT_SYS(0, 0, sigaction(SIGALRM, &oldsa, 0));
// EXPECT_SYS(0, 0, setitimer(ITIMER_REAL, &oldit, 0)); // EXPECT_SYS(0, 0, setitimer(ITIMER_REAL, &oldit, 0));

View file

@ -42,6 +42,7 @@ TEST_LIBC_CALLS_DIRECTDEPS = \
LIBC_STR \ LIBC_STR \
LIBC_STUBS \ LIBC_STUBS \
LIBC_SYSV \ LIBC_SYSV \
LIBC_THREAD \
LIBC_TIME \ LIBC_TIME \
LIBC_TESTLIB \ LIBC_TESTLIB \
LIBC_UNICODE \ LIBC_UNICODE \

View file

@ -43,7 +43,8 @@
*/ */
static uint64_t Rando(void) { static uint64_t Rando(void) {
uint64_t x; uint64_t x;
do x = lemur64(); do
x = lemur64();
while (((x ^ READ64LE("!!!!!!!!")) - 0x0101010101010101) & while (((x ^ READ64LE("!!!!!!!!")) - 0x0101010101010101) &
~(x ^ READ64LE("!!!!!!!!")) & 0x8080808080808080); ~(x ^ READ64LE("!!!!!!!!")) & 0x8080808080808080);
return x; return x;
@ -279,7 +280,7 @@ TEST(ksnprintf, testMisalignedPointer_wontFormat) {
TEST(ksnprintf, testUnterminatedOverrun_truncatesAtPageBoundary) { TEST(ksnprintf, testUnterminatedOverrun_truncatesAtPageBoundary) {
char *m; char *m;
char b[32]; char b[32];
m = memset(mapanon(FRAMESIZE * 2), 1, FRAMESIZE); m = memset(_mapanon(FRAMESIZE * 2), 1, FRAMESIZE);
EXPECT_SYS(0, 0, munmap(m + FRAMESIZE, FRAMESIZE)); EXPECT_SYS(0, 0, munmap(m + FRAMESIZE, FRAMESIZE));
EXPECT_EQ(12, ksnprintf(b, 32, "%'s", m + FRAMESIZE - 3)); EXPECT_EQ(12, ksnprintf(b, 32, "%'s", m + FRAMESIZE - 3));
EXPECT_STREQ("\\001\\001\\001", b); EXPECT_STREQ("\\001\\001\\001", b);

View file

@ -41,32 +41,18 @@
#include "libc/sysv/consts/rlimit.h" #include "libc/sysv/consts/rlimit.h"
#include "libc/testlib/ezbench.h" #include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#define THREADS 8 #define THREADS 8
#define ITERATIONS 512 #define ITERATIONS 512
#define TLS_SIZE PAGESIZE
char *tls[THREADS];
char *stack[THREADS];
_Alignas(PAGESIZE) char tlsdata[THREADS * 3][TLS_SIZE];
int count; int count;
_Atomic(int) started; _Atomic(int) started;
_Atomic(int) finished; _Atomic(int) finished;
_Alignas(64) char slock; _Alignas(64) char slock;
pthread_mutex_t mylock; pthread_mutex_t mylock;
struct spawn th[THREADS];
__attribute__((__constructor__)) void init(void) {
int i;
__enable_tls();
__enable_threads();
for (i = 0; i < THREADS; ++i) {
CHECK_NE(-1, mprotect(tlsdata[i * 3 + 0], TLS_SIZE, PROT_NONE));
tls[i] = tlsdata[i * 3 + 1];
CHECK_NE(-1, mprotect(tlsdata[i * 3 + 2], TLS_SIZE, PROT_NONE));
}
}
TEST(pthread_mutex_lock, normal) { TEST(pthread_mutex_lock, normal) {
pthread_mutex_t lock; pthread_mutex_t lock;
@ -116,7 +102,7 @@ TEST(pthread_mutex_lock, errorcheck) {
__assert_disable = false; __assert_disable = false;
} }
int MutexWorker(void *p) { int MutexWorker(void *p, int tid) {
int i; int i;
++started; ++started;
for (i = 0; i < ITERATIONS; ++i) { for (i = 0; i < ITERATIONS; ++i) {
@ -124,7 +110,6 @@ int MutexWorker(void *p) {
++count; ++count;
pthread_mutex_unlock(&mylock); pthread_mutex_unlock(&mylock);
} }
ASSERT_NE(0, (int *)(tls[(intptr_t)p] + 0x38));
++finished; ++finished;
return 0; return 0;
} }
@ -140,29 +125,14 @@ TEST(pthread_mutex_lock, contention) {
started = 0; started = 0;
finished = 0; finished = 0;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
ASSERT_NE(MAP_FAILED, ASSERT_SYS(0, 0, _spawn(MutexWorker, (void *)(intptr_t)i, th + i));
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]),
TLS_SIZE, (int *)(tls[i] + 0x38)));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); ASSERT_SYS(0, 0, _join(th + i));
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
}
for (i = 0; i < THREADS; ++i) {
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
} }
EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, started);
EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS, finished);
EXPECT_EQ(THREADS * ITERATIONS, count); EXPECT_EQ(THREADS * ITERATIONS, count);
for (i = 0; i < THREADS; ++i) {
ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize()));
}
EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); EXPECT_EQ(0, pthread_mutex_destroy(&mylock));
} }
@ -177,29 +147,14 @@ TEST(pthread_mutex_lock, rcontention) {
started = 0; started = 0;
finished = 0; finished = 0;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
ASSERT_NE(MAP_FAILED, ASSERT_NE(-1, _spawn(MutexWorker, (void *)(intptr_t)i, th + i));
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]),
TLS_SIZE, (int *)(tls[i] + 0x38)));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
}
for (i = 0; i < THREADS; ++i) {
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
} }
EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, started);
EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS, finished);
EXPECT_EQ(THREADS * ITERATIONS, count); EXPECT_EQ(THREADS * ITERATIONS, count);
for (i = 0; i < THREADS; ++i) {
ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize()));
}
EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); EXPECT_EQ(0, pthread_mutex_destroy(&mylock));
} }
@ -214,33 +169,18 @@ TEST(pthread_mutex_lock, econtention) {
started = 0; started = 0;
finished = 0; finished = 0;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
ASSERT_NE(MAP_FAILED, ASSERT_NE(-1, _spawn(MutexWorker, (void *)(intptr_t)i, th + i));
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]),
TLS_SIZE, (int *)(tls[i] + 0x38)));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
}
for (i = 0; i < THREADS; ++i) {
ASSERT_EQ(0, *(int *)(tls[i] + 0x38));
} }
EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, started);
EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS, finished);
EXPECT_EQ(THREADS * ITERATIONS, count); EXPECT_EQ(THREADS * ITERATIONS, count);
for (i = 0; i < THREADS; ++i) {
ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize()));
}
EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); EXPECT_EQ(0, pthread_mutex_destroy(&mylock));
} }
int SpinlockWorker(void *p) { int SpinlockWorker(void *p, int tid) {
int i; int i;
++started; ++started;
for (i = 0; i < ITERATIONS; ++i) { for (i = 0; i < ITERATIONS; ++i) {
@ -248,7 +188,6 @@ int SpinlockWorker(void *p) {
++count; ++count;
_spunlock(&slock); _spunlock(&slock);
} }
ASSERT_NE(0, (int *)(tls[(intptr_t)p] + 0x38));
++finished; ++finished;
return 0; return 0;
} }
@ -259,25 +198,14 @@ TEST(_spinlock, contention) {
started = 0; started = 0;
finished = 0; finished = 0;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
ASSERT_NE(MAP_FAILED, ASSERT_NE(-1, _spawn(SpinlockWorker, (void *)(intptr_t)i, th + i));
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
ASSERT_NE(-1, clone(SpinlockWorker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID | CLONE_SETTLS,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]),
TLS_SIZE, (int *)(tls[i] + 0x38)));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
} }
EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, started);
EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS, finished);
EXPECT_EQ(THREADS * ITERATIONS, count); EXPECT_EQ(THREADS * ITERATIONS, count);
for (i = 0; i < THREADS; ++i) {
ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize()));
}
} }
BENCH(pthread_mutex_lock, bench) { BENCH(pthread_mutex_lock, bench) {

View file

@ -35,6 +35,7 @@ TEST_LIBC_INTRIN_DIRECTDEPS = \
LIBC_STR \ LIBC_STR \
LIBC_STUBS \ LIBC_STUBS \
LIBC_SYSV \ LIBC_SYSV \
LIBC_THREAD \
LIBC_TESTLIB \ LIBC_TESTLIB \
LIBC_TINYMATH \ LIBC_TINYMATH \
LIBC_UNICODE \ LIBC_UNICODE \

View file

@ -78,7 +78,7 @@ TEST(lz4, zoneFileGmt) {
size_t mapsize, gmtsize; size_t mapsize, gmtsize;
char *mapping, *gmtdata; char *mapping, *gmtdata;
lz4decode((gmtdata = lz4decode( lz4decode((gmtdata = lz4decode(
(mapping = mapanon( (mapping = _mapanon(
(mapsize = roundup( (mapsize = roundup(
LZ4_FRAME_BLOCKCONTENTSIZE(lz4check(dict.addr)) + LZ4_FRAME_BLOCKCONTENTSIZE(lz4check(dict.addr)) +
(gmtsize = LZ4_FRAME_BLOCKCONTENTSIZE( (gmtsize = LZ4_FRAME_BLOCKCONTENTSIZE(

View file

@ -37,6 +37,7 @@
#include "libc/sysv/consts/sa.h" #include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h" #include "libc/thread/thread.h"
#include "libc/time/time.h" #include "libc/time/time.h"
@ -54,7 +55,7 @@ dontinline void Generate(int i) {
A[i] = rand64(); A[i] = rand64();
} }
int Thrasher(void *arg) { int Thrasher(void *arg, int tid) {
int i, id = (intptr_t)arg; int i, id = (intptr_t)arg;
while (!atomic_load(&ready)) { while (!atomic_load(&ready)) {
cthread_memory_wait32(&ready, 0, 0); cthread_memory_wait32(&ready, 0, 0);
@ -83,9 +84,8 @@ TEST(rand64, testLcg_doesntProduceIdenticalValues) {
TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) {
int i, j, rc, ws; int i, j, rc, ws;
sigset_t ss, oldss; sigset_t ss, oldss;
char *tls[THREADS];
void *stacks[THREADS];
struct sigaction oldsa; struct sigaction oldsa;
struct spawn th[THREADS];
struct sigaction sa = {.sa_handler = OnChld, .sa_flags = SA_RESTART}; struct sigaction sa = {.sa_handler = OnChld, .sa_flags = SA_RESTART};
EXPECT_NE(-1, sigaction(SIGCHLD, &sa, &oldsa)); EXPECT_NE(-1, sigaction(SIGCHLD, &sa, &oldsa));
bzero(A, sizeof(A)); bzero(A, sizeof(A));
@ -94,25 +94,12 @@ TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) {
EXPECT_EQ(0, sigprocmask(SIG_BLOCK, &ss, &oldss)); EXPECT_EQ(0, sigprocmask(SIG_BLOCK, &ss, &oldss));
ready = false; ready = false;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
tls[i] = __initialize_tls(calloc(1, 64)); ASSERT_SYS(0, 0, _spawn(Thrasher, (void *)(intptr_t)i, th + i));
stacks[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(
-1,
clone(Thrasher, stacks[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, tls[i], 64, (int *)(tls[i] + 0x38)));
} }
atomic_store(&ready, 1); atomic_store(&ready, 1);
cthread_memory_wake32(&ready, INT_MAX); cthread_memory_wake32(&ready, INT_MAX);
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
while ((j = atomic_load((uint32_t *)(tls[i] + 0x38)))) { ASSERT_SYS(0, 0, _join(th + i));
// FUTEX_WAIT_PRIVATE makes it hang
cthread_memory_wait32((int *)(tls[i] + 0x38), j, 0);
}
EXPECT_SYS(0, 0, munmap(stacks[i], GetStackSize()));
free(tls[i]);
} }
sigaction(SIGCHLD, &oldsa, 0); sigaction(SIGCHLD, &oldsa, 0);
sigprocmask(SIG_BLOCK, &oldss, 0); sigprocmask(SIG_BLOCK, &oldss, 0);

View file

@ -23,9 +23,11 @@
#include "libc/intrin/spinlock.h" #include "libc/intrin/spinlock.h"
#include "libc/intrin/wait0.internal.h" #include "libc/intrin/wait0.internal.h"
#include "libc/log/backtrace.internal.h" #include "libc/log/backtrace.internal.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/nexgen32e/gettls.h" #include "libc/nexgen32e/gettls.h"
#include "libc/nexgen32e/nexgen32e.h" #include "libc/nexgen32e/nexgen32e.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h" #include "libc/runtime/stack.h"
#include "libc/runtime/symbols.internal.h" #include "libc/runtime/symbols.internal.h"
@ -36,10 +38,10 @@
#include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sig.h"
#include "libc/testlib/ezbench.h" #include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/time/time.h" #include "libc/time/time.h"
char *stack, *tls; int x, me, tid;
int x, me, tid, *childetid;
_Atomic(int) thechilde; _Atomic(int) thechilde;
__attribute__((__constructor__)) static void init(void) { __attribute__((__constructor__)) static void init(void) {
@ -47,47 +49,38 @@ __attribute__((__constructor__)) static void init(void) {
errno = 0; errno = 0;
} }
void *__initialize_tls(char tib[64]) {
if (tib) {
*(intptr_t *)(tib + 0x00) = (intptr_t)tib;
*(intptr_t *)(tib + 0x30) = (intptr_t)tib;
*(int *)(tib + 0x38) = -1; // tid
*(int *)(tib + 0x3c) = 0;
}
return tib;
}
void SetUp(void) { void SetUp(void) {
x = 0; x = 0;
me = gettid(); me = gettid();
tls = calloc(1, 64);
__initialize_tls(tls);
*(int *)(tls + 0x3c) = 31337;
childetid = (int *)(tls + 0x38);
ASSERT_NE(MAP_FAILED, (stack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
} }
void TearDown(void) { void TearDown(void) {
EXPECT_SYS(0, 0, munmap(stack, GetStackSize()));
free(tls);
} }
int DoNothing(void *arg) { int DoNothing(void *arg) {
return 0; return 0;
} }
////////////////////////////////////////////////////////////////////////////////
// TEST ERROR NUMBERS
TEST(clone, testNullFunc_raisesEinval) {
EXPECT_SYS(EINVAL, -1,
clone(0, stack, GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_SETTLS,
0, 0, tls, 64, 0));
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// TEST THREADS WORK // TEST THREADS WORK
int CloneTest1(void *arg) { int CloneTest1(void *arg, int tid) {
intptr_t rsp, top, bot; intptr_t rsp, top, bot;
CheckStackIsAligned(); CheckStackIsAligned();
// PrintBacktraceUsingSymbols(2, __builtin_frame_address(0), // PrintBacktraceUsingSymbols(2, __builtin_frame_address(0),
// GetSymbolTable()); // GetSymbolTable());
rsp = (intptr_t)__builtin_frame_address(0); rsp = (intptr_t)__builtin_frame_address(0);
bot = (intptr_t)stack; bot = ROUNDDOWN((intptr_t)rsp, GetStackSize());
top = bot + GetStackSize(); top = bot + GetStackSize();
ASSERT_GT(rsp, bot); // check we're on stack ASSERT_GT(rsp, bot); // check we're on stack
ASSERT_LT(rsp, top); // check we're on stack ASSERT_LT(rsp, top); // check we're on stack
@ -95,28 +88,16 @@ int CloneTest1(void *arg) {
ASSERT_TRUE(IS2POW(GetStackSize())); ASSERT_TRUE(IS2POW(GetStackSize()));
ASSERT_EQ(0, bot & (GetStackSize() - 1)); ASSERT_EQ(0, bot & (GetStackSize() - 1));
x = 42; x = 42;
if (!IsWindows()) {
ASSERT_EQ(31337, errno);
} else {
errno = 31337;
ASSERT_EQ(31337, errno);
}
ASSERT_EQ(23, (intptr_t)arg); ASSERT_EQ(23, (intptr_t)arg);
ASSERT_NE(gettid(), getpid()); ASSERT_NE(gettid(), getpid());
ASSERT_EQ(gettid(), *childetid); // CLONE_CHILD_SETTID
return 0; return 0;
} }
TEST(clone, test1) { TEST(clone, test1) {
int ptid = 0; int ptid = 0;
*childetid = -1; struct spawn th;
ASSERT_NE(-1, (tid = clone(CloneTest1, stack, GetStackSize(), ASSERT_SYS(0, 0, _spawn(CloneTest1, (void *)23, &th));
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | ASSERT_SYS(0, 0, _join(&th));
CLONE_SIGHAND | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID |
CLONE_SETTLS,
(void *)23, &ptid, tls, 64, childetid)));
_wait0(childetid); // CLONE_CHILD_CLEARTID
ASSERT_NE(gettid(), tid); ASSERT_NE(gettid(), tid);
ASSERT_EQ(tid, ptid); ASSERT_EQ(tid, ptid);
ASSERT_EQ(42, x); ASSERT_EQ(42, x);
@ -132,7 +113,7 @@ TEST(clone, test1) {
_Atomic(int) sysbarrier; _Atomic(int) sysbarrier;
int CloneTestSys(void *arg) { int CloneTestSys(void *arg, int tid) {
int i, id = (intptr_t)arg; int i, id = (intptr_t)arg;
CheckStackIsAligned(); CheckStackIsAligned();
while (!sysbarrier) asm("pause"); while (!sysbarrier) asm("pause");
@ -165,25 +146,14 @@ int CloneTestSys(void *arg) {
TEST(clone, tlsSystemCallsErrno_wontClobberMainThreadBecauseTls) { TEST(clone, tlsSystemCallsErrno_wontClobberMainThreadBecauseTls) {
int i; int i;
char *tls[8], *stack[8]; struct spawn th[8];
ASSERT_EQ(0, errno); ASSERT_EQ(0, errno);
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
tls[i] = __initialize_tls(malloc(64)); ASSERT_SYS(0, 0, _spawn(CloneTestSys, (void *)(intptr_t)i, th + i));
stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(
-1,
(tid = clone(
CloneTestSys, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
(void *)(intptr_t)i, 0, tls[i], 64, (int *)(tls[i] + 0x38))));
} }
sysbarrier = 1; sysbarrier = 1;
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
_wait0((int *)(tls[i] + 0x38)); ASSERT_SYS(0, 0, _join(th + i));
free(tls[i]);
munmap(stack[i], GetStackSize());
} }
ASSERT_EQ(0, errno); ASSERT_EQ(0, errno);
} }

View file

@ -119,7 +119,7 @@ TEST(mprotect, testSegfault_writeToReadOnlyAnonymous) {
} }
TEST(mprotect, testExecOnly_canExecute) { TEST(mprotect, testExecOnly_canExecute) {
char *p = mapanon(FRAMESIZE); char *p = _mapanon(FRAMESIZE);
void (*f)(void) = (void *)p; void (*f)(void) = (void *)p;
p[0] = 0xC3; // RET p[0] = 0xC3; // RET
ASSERT_SYS(0, 0, mprotect(p, FRAMESIZE, PROT_EXEC | PROT_READ)); ASSERT_SYS(0, 0, mprotect(p, FRAMESIZE, PROT_EXEC | PROT_READ));

View file

@ -37,6 +37,7 @@ TEST_LIBC_RUNTIME_DIRECTDEPS = \
LIBC_STR \ LIBC_STR \
LIBC_STUBS \ LIBC_STUBS \
LIBC_SYSV \ LIBC_SYSV \
LIBC_THREAD \
LIBC_TESTLIB \ LIBC_TESTLIB \
LIBC_TINYMATH \ LIBC_TINYMATH \
LIBC_UNICODE \ LIBC_UNICODE \

View file

@ -28,6 +28,7 @@
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
#include "libc/testlib/testlib.h" #include "libc/testlib/testlib.h"
#include "libc/thread/spawn.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#define THREADS 32 #define THREADS 32
@ -46,10 +47,9 @@ union Dub {
double x; double x;
}; };
char *stack[THREADS]; struct spawn th[THREADS];
char tls[THREADS][64];
int Worker(void *p) { int Worker(void *p, int tid) {
int i; int i;
char str[64]; char str[64];
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
@ -63,17 +63,10 @@ int Worker(void *p) {
TEST(dtoa, test) { TEST(dtoa, test) {
int i; int i;
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
clone(Worker, _spawn(Worker, 0, th + i);
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)),
GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
0, 0, __initialize_tls(tls[i]), sizeof(tls[i]),
(int *)(tls[i] + 0x38));
} }
for (i = 0; i < THREADS; ++i) { for (i = 0; i < THREADS; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
} }
} }

View file

@ -37,6 +37,7 @@ TEST_LIBC_STDIO_DIRECTDEPS = \
LIBC_SYSV \ LIBC_SYSV \
LIBC_TINYMATH \ LIBC_TINYMATH \
LIBC_TESTLIB \ LIBC_TESTLIB \
LIBC_THREAD \
LIBC_TIME \ LIBC_TIME \
LIBC_LOG \ LIBC_LOG \
LIBC_UNICODE \ LIBC_UNICODE \

View file

@ -1,100 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/testlib/testlib.h"
#include "libc/thread/thread.h"
#include "libc/time/time.h"
static _Thread_local int tdata = 31337;
static _Thread_local int tbss;
static void *ReturnArg(void *arg) {
return arg;
}
TEST(cthread_create, testJoinDeadlock) {
ASSERT_SYS(0, EDEADLK, cthread_join(cthread_self(), 0));
}
TEST(cthread_create, testCreateReturnJoin) {
if (IsOpenbsd()) return; // TODO(jart): flakes
void *exitcode;
cthread_t thread;
ASSERT_EQ(0, cthread_create(&thread, 0, ReturnArg, ReturnArg));
ASSERT_EQ(0, cthread_join(thread, &exitcode));
ASSERT_EQ(ReturnArg, exitcode);
}
static void *ExitArg(void *arg) {
cthread_exit(arg);
}
TEST(cthread_create, testCreateExitJoin) {
if (IsOpenbsd()) return; // TODO(jart): flakes
void *exitcode;
cthread_t thread;
ASSERT_EQ(0, cthread_create(&thread, 0, ExitArg, (void *)-31337));
ASSERT_EQ(0, cthread_join(thread, &exitcode));
ASSERT_EQ((void *)-31337, exitcode);
}
TEST(gcctls, size) {
if (IsXnu()) return; // TODO(jart): codemorph
if (IsWindows()) return; // TODO(jart): codemorph
if (IsOpenbsd()) return; // TODO(jart): flakes
// schlep in .zip section too
// make sure executable isn't too huge
size_t size;
int64_t x = 0;
gmtime(&x);
ASSERT_LT((uintptr_t)_tls_size, 8192);
size = GetFileSize(GetProgramExecutableName());
if (IsTiny()) {
ASSERT_LT(size, 200 * 1024);
} else if (IsModeDbg() || IsAsan()) {
ASSERT_LT(size, 4 * 1024 * 1024);
} else {
ASSERT_LT(size, 500 * 1024);
}
}
static void *TlsWorker(void *arg) {
ASSERT_EQ(31337, tdata);
ASSERT_EQ(0, tbss);
return 0;
}
TEST(gcctls, worksAndIsNonInheritable) {
if (IsXnu()) return; // TODO(jart): codemorph
if (IsWindows()) return; // TODO(jart): codemorph
if (IsOpenbsd()) return; // TODO(jart): flakes
void *exitcode;
cthread_t thread;
ASSERT_EQ(tdata, 31337);
ASSERT_EQ(tbss, 0);
tdata = 1337;
tbss = 1337;
ASSERT_EQ(0, cthread_create(&thread, 0, TlsWorker, (void *)-31337));
ASSERT_EQ(0, cthread_join(thread, &exitcode));
ASSERT_EQ(NULL, exitcode);
}

View file

@ -16,22 +16,15 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/calls.h"
#include "libc/dce.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
char tib[64]; _Thread_local char x;
static _Thread_local char y;
TEST(gettid, test) { char ha(void) {
if (IsLinux()) EXPECT_EQ(getpid(), gettid()); ++y;
if (IsNetbsd()) EXPECT_EQ(1, gettid()); return x;
} }
BENCH(gettid, bench) { char ya(void) {
int gettid_(void) asm("gettid"); return y;
EZBENCH2("gettid (single threaded)", donothing, gettid());
__install_tls(__initialize_tls(tib));
EZBENCH2("gettid (tls enabled)", donothing, gettid());
} }

View file

@ -16,63 +16,42 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/calls/struct/sigaction.h" #include "libc/assert.h"
#include "libc/intrin/spinlock.h" #include "libc/bits/atomic.h"
#include "libc/intrin/wait0.internal.h" #include "libc/calls/calls.h"
#include "libc/log/log.h" #include "libc/calls/syscall-sysv.internal.h"
#include "libc/mem/mem.h" #include "libc/intrin/kprintf.h"
#include "libc/nexgen32e/threaded.h" #include "libc/macros.internal.h"
#include "libc/runtime/gc.internal.h" #include "libc/testlib/testlib.h"
#include "libc/runtime/runtime.h" #include "libc/thread/spawn.h"
#include "libc/runtime/stack.h" #include "libc/thread/thread.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sig.h"
#include "libc/time/time.h"
volatile bool gotctrlc; #define N 128
void GotCtrlC(int sig) { struct spawn t[N];
gotctrlc = true; _Atomic(int) itworked;
_Thread_local int var;
int Worker(void *arg, int tid) {
int i = (long)arg;
ASSERT_EQ(0, var++);
ASSERT_EQ(gettid(), tid);
ASSERT_EQ(1, var++);
ASSERT_EQ(sys_gettid(), tid);
ASSERT_EQ(2, var++);
itworked++;
return 0;
} }
int Worker(void *arg) { TEST(_spawn, test) {
uint8_t *p; long i;
unsigned x = 0; for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _spawn(Worker, (void *)i, t + i));
struct sigaction sa = {.sa_handler = GotCtrlC}; for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i));
sigaction(SIGINT, &sa, 0); for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i));
for (;;) { EXPECT_EQ(N, itworked);
for (p = _base; p < _end; ++p) {
x += *p;
if (gotctrlc) {
return x | x >> 8 | x >> 16 | x >> 24;
}
}
}
} }
int main(int argc, char *argv[]) { __attribute__((__constructor__)) static void init(void) {
char **tls; pledge("stdio rpath thread", 0);
int i, n, prot, flags; errno = 0;
ShowCrashReports();
n = GetCpuCount();
tls = gc(malloc(n * sizeof(*tls)));
for (i = 0; i < n; ++i) {
prot = PROT_READ | PROT_WRITE;
flags = MAP_STACK | MAP_ANONYMOUS;
tls[i] = __initialize_tls(malloc(64));
clone(Worker, mmap(0, GetStackSize(), prot, flags, -1, 0), GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
0, 0, tls[i], 64, (int *)(tls[i] + 0x38));
}
while (!gotctrlc) {
usleep(1000);
}
for (i = 0; i < n; ++i) {
_wait0((int *)(tls[i] + 0x38));
free(tls[i]);
}
} }

View file

@ -184,7 +184,7 @@ static void *
_PyObject_ArenaMmap(void *ctx, size_t size) _PyObject_ArenaMmap(void *ctx, size_t size)
{ {
#ifdef __COSMOPOLITAN__ #ifdef __COSMOPOLITAN__
return mapanon(size); return _mapanon(size);
#else #else
void *ptr; void *ptr;
ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, ptr = mmap(NULL, size, PROT_READ|PROT_WRITE,

View file

@ -47,6 +47,7 @@ TOOL_BUILD_DIRECTDEPS = \
LIBC_SYSV \ LIBC_SYSV \
LIBC_SYSV_CALLS \ LIBC_SYSV_CALLS \
LIBC_TIME \ LIBC_TIME \
LIBC_THREAD \
LIBC_TINYMATH \ LIBC_TINYMATH \
LIBC_UNICODE \ LIBC_UNICODE \
LIBC_X \ LIBC_X \

View file

@ -52,6 +52,7 @@
#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h" #include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/prot.h"
#include "libc/thread/spawn.h"
#include "libc/time/time.h" #include "libc/time/time.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "third_party/getopt/getopt.h" #include "third_party/getopt/getopt.h"
@ -122,11 +123,10 @@ struct Edges {
}; };
char *out; char *out;
char **tls;
int threads; int threads;
char **bouts; char **bouts;
char **stack;
unsigned counter; unsigned counter;
struct spawn *th;
struct GetArgs ga; struct GetArgs ga;
struct Edges edges; struct Edges edges;
struct Sauce *sauces; struct Sauce *sauces;
@ -248,7 +248,7 @@ wontreturn void OnMissingFile(const char *list, const char *src) {
exit(1); exit(1);
} }
int LoadRelationshipsWorker(void *arg) { int LoadRelationshipsWorker(void *arg, int tid) {
int fd; int fd;
ssize_t rc; ssize_t rc;
bool skipme; bool skipme;
@ -307,18 +307,14 @@ void LoadRelationships(int argc, char *argv[]) {
int i; int i;
getargs_init(&ga, argv + optind); getargs_init(&ga, argv + optind);
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
if (clone(LoadRelationshipsWorker, stack[i], GetStackSize(), if (_spawn(LoadRelationshipsWorker, (void *)(intptr_t)i, th + i) == -1) {
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64,
(int *)(tls[i] + 0x38)) == -1) {
pthread_mutex_lock(&reportlock); pthread_mutex_lock(&reportlock);
kprintf("error: clone(%d) failed %m\n", i); kprintf("error: clone(%d) failed %m\n", i);
exit(1); exit(1);
} }
} }
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
} }
getargs_destroy(&ga); getargs_destroy(&ga);
} }
@ -388,17 +384,17 @@ void Dive(char **bout, uint32_t *visited, unsigned id) {
} }
} }
int Diver(void *arg) { int Diver(void *arg, int tid) {
char *bout = 0; char *bout = 0;
const char *path; const char *path;
uint32_t *visited; uint32_t *visited;
size_t i, visilen; size_t i, visilen;
char pathbuf[PATH_MAX]; char pathbuf[PATH_MAX];
int tid = (intptr_t)arg; int x = (intptr_t)arg;
visilen = (sources.i + sizeof(*visited) * CHAR_BIT - 1) / visilen = (sources.i + sizeof(*visited) * CHAR_BIT - 1) /
(sizeof(*visited) * CHAR_BIT); (sizeof(*visited) * CHAR_BIT);
visited = malloc(visilen * sizeof(*visited)); visited = malloc(visilen * sizeof(*visited));
for (i = tid; i < sources.i; i += threads) { for (i = x; i < sources.i; i += threads) {
path = strings.p + sauces[i].name; path = strings.p + sauces[i].name;
if (!IsObjectSource(path)) continue; if (!IsObjectSource(path)) continue;
appendw(&bout, '\n'); appendw(&bout, '\n');
@ -415,25 +411,21 @@ int Diver(void *arg) {
} }
free(visited); free(visited);
appendw(&bout, '\n'); appendw(&bout, '\n');
bouts[tid] = bout; bouts[x] = bout;
return 0; return 0;
} }
void Explore(void) { void Explore(void) {
int i; int i;
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
if (clone(Diver, stack[i], GetStackSize(), if (_spawn(Diver, (void *)(intptr_t)i, th + i) == -1) {
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64,
(int *)(tls[i] + 0x38)) == -1) {
pthread_mutex_lock(&reportlock); pthread_mutex_lock(&reportlock);
kprintf("error: clone(%d) failed %m\n", i); kprintf("error: clone(%d) failed %m\n", i);
exit(1); exit(1);
} }
} }
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
_wait0((int *)(tls[i] + 0x38)); _join(th + i);
} }
} }
@ -443,17 +435,8 @@ int main(int argc, char *argv[]) {
if (argc == 2 && !strcmp(argv[1], "-n")) exit(0); if (argc == 2 && !strcmp(argv[1], "-n")) exit(0);
GetOpts(argc, argv); GetOpts(argc, argv);
threads = GetCpuCount(); threads = GetCpuCount();
tls = calloc(threads, sizeof(*tls)); th = calloc(threads, sizeof(*th));
stack = calloc(threads, sizeof(*stack));
bouts = calloc(threads, sizeof(*bouts)); bouts = calloc(threads, sizeof(*bouts));
for (i = 0; i < threads; ++i) {
if (!(tls[i] = malloc(64)) ||
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED) {
kprintf("error: mmap(%d) failed %m\n", i);
exit(1);
}
}
LoadRelationships(argc, argv); LoadRelationships(argc, argv);
Crunch(); Crunch();
Explore(); Explore();
@ -466,15 +449,12 @@ int main(int argc, char *argv[]) {
CHECK_NE(-1, close(fd)); CHECK_NE(-1, close(fd));
CHECK_NE(-1, rename(path, out)); CHECK_NE(-1, rename(path, out));
for (i = 0; i < threads; ++i) { for (i = 0; i < threads; ++i) {
munmap(stack[i], GetStackSize());
free(bouts[i]); free(bouts[i]);
free(tls[i]);
} }
free(strings.p); free(strings.p);
free(edges.p); free(edges.p);
free(sauces); free(sauces);
free(stack);
free(bouts); free(bouts);
free(tls); free(th);
return 0; return 0;
} }

View file

@ -49,6 +49,7 @@ TOOL_NET_DIRECTDEPS = \
LIBC_SYSV \ LIBC_SYSV \
LIBC_SYSV_CALLS \ LIBC_SYSV_CALLS \
LIBC_TIME \ LIBC_TIME \
LIBC_THREAD \
LIBC_TINYMATH \ LIBC_TINYMATH \
LIBC_UNICODE \ LIBC_UNICODE \
LIBC_X \ LIBC_X \

View file

@ -36,7 +36,6 @@
#include "libc/intrin/kprintf.h" #include "libc/intrin/kprintf.h"
#include "libc/intrin/nomultics.internal.h" #include "libc/intrin/nomultics.internal.h"
#include "libc/intrin/spinlock.h" #include "libc/intrin/spinlock.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/log/check.h" #include "libc/log/check.h"
#include "libc/log/log.h" #include "libc/log/log.h"
#include "libc/macros.internal.h" #include "libc/macros.internal.h"
@ -86,6 +85,7 @@
#include "libc/sysv/consts/termios.h" #include "libc/sysv/consts/termios.h"
#include "libc/sysv/consts/w.h" #include "libc/sysv/consts/w.h"
#include "libc/sysv/errfuns.h" #include "libc/sysv/errfuns.h"
#include "libc/thread/spawn.h"
#include "libc/x/x.h" #include "libc/x/x.h"
#include "libc/zip.h" #include "libc/zip.h"
#include "net/http/escape.h" #include "net/http/escape.h"
@ -421,7 +421,6 @@ static lua_State *GL;
static lua_State *YL; static lua_State *YL;
static char *content; static char *content;
static uint8_t *zmap; static uint8_t *zmap;
static char *repltls;
static uint8_t *zbase; static uint8_t *zbase;
static uint8_t *zcdir; static uint8_t *zcdir;
static size_t hdrsize; static size_t hdrsize;
@ -431,7 +430,6 @@ static char *replstack;
static reader_f reader; static reader_f reader;
static writer_f writer; static writer_f writer;
static char *extrahdrs; static char *extrahdrs;
static char *monitortls;
static char *luaheaderp; static char *luaheaderp;
static const char *zpath; static const char *zpath;
static const char *brand; static const char *brand;
@ -454,6 +452,8 @@ static const char *launchbrowser;
static const char *referrerpolicy; static const char *referrerpolicy;
static ssize_t (*generator)(struct iovec[3]); static ssize_t (*generator)(struct iovec[3]);
static struct spawn replth;
static struct spawn monitorth;
static struct Buffer inbuf_actual; static struct Buffer inbuf_actual;
static struct Buffer inbuf; static struct Buffer inbuf;
static struct Buffer oldin; static struct Buffer oldin;
@ -6461,7 +6461,7 @@ static int ExitWorker(void) {
} }
if (monitortty) { if (monitortty) {
terminatemonitor = true; terminatemonitor = true;
_wait0((int *)(monitortls + 0x38)); _join(&monitorth);
} }
_Exit(0); _Exit(0);
} }
@ -6482,7 +6482,7 @@ static int EnableSandbox(void) {
} }
} }
static int MemoryMonitor(void *arg) { static int MemoryMonitor(void *arg, int tid) {
static struct termios oldterm; static struct termios oldterm;
static int tty; static int tty;
sigset_t ss; sigset_t ss;
@ -6637,23 +6637,9 @@ static int MemoryMonitor(void *arg) {
} }
static void MonitorMemory(void) { static void MonitorMemory(void) {
if ((monitortls = malloc(64))) { if (_spawn(MemoryMonitor, 0, &monitorth) == -1) {
if ((monitorstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) {
if (clone(MemoryMonitor, monitorstack, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_SETTLS | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
0, 0, __initialize_tls(monitortls), 64,
(int *)(monitortls + 0x38)) != -1) {
return;
}
munmap(monitorstack, GetStackSize());
}
free(monitortls);
}
WARNF("(memv) failed to start memory monitor %m"); WARNF("(memv) failed to start memory monitor %m");
monitortty = 0; }
} }
static int HandleConnection(size_t i) { static int HandleConnection(size_t i) {
@ -7029,7 +7015,7 @@ static void ReplEventLoop(void) {
polls[0].fd = -1; polls[0].fd = -1;
} }
static int WindowsReplThread(void *arg) { static int WindowsReplThread(void *arg, int tid) {
int sig; int sig;
lua_State *L = GL; lua_State *L = GL;
DEBUGF("(repl) started windows thread"); DEBUGF("(repl) started windows thread");
@ -7289,16 +7275,7 @@ void RedBean(int argc, char *argv[]) {
if (daemonize || uniprocess || !linenoiseIsTerminal()) { if (daemonize || uniprocess || !linenoiseIsTerminal()) {
EventLoop(HEARTBEAT); EventLoop(HEARTBEAT);
} else if (IsWindows()) { } else if (IsWindows()) {
CHECK_NE(MAP_FAILED, (repltls = malloc(64))); CHECK_NE(-1, _spawn(WindowsReplThread, 0, &replth));
CHECK_NE(MAP_FAILED,
(replstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
CHECK_NE(
-1,
clone(WindowsReplThread, replstack, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
0, 0, __initialize_tls(repltls), 64, (int *)(repltls + 0x38)));
EventLoop(100); EventLoop(100);
} else { } else {
ReplEventLoop(); ReplEventLoop();
@ -7315,19 +7292,11 @@ void RedBean(int argc, char *argv[]) {
} }
if (!isexitingworker) { if (!isexitingworker) {
if (!IsTiny()) { if (!IsTiny()) {
if (monitortty) {
terminatemonitor = true; terminatemonitor = true;
_wait0((int *)(monitortls + 0x38)); _join(&monitorth);
munmap(monitorstack, GetStackSize());
free(monitortls);
}
} }
#ifndef STATIC #ifndef STATIC
if (repltls) { _join(&replth);
_wait0((int *)(repltls + 0x38));
munmap(replstack, GetStackSize());
free(repltls);
}
#endif #endif
} }
if (!isexitingworker) { if (!isexitingworker) {
@ -7349,12 +7318,10 @@ int main(int argc, char *argv[]) {
return 0; return 0;
CloseServerFds(); CloseServerFds();
} }
if (repltls) { _join(&replth);
free(repltls);
linenoiseDisableRawMode(); linenoiseDisableRawMode();
linenoiseHistoryFree(); linenoiseHistoryFree();
} }
}
CheckForMemoryLeaks(); CheckForMemoryLeaks();
} }

View file

@ -434,7 +434,7 @@ static void PrintImage(unsigned yn, unsigned xn,
char *v, *vt; char *v, *vt;
size = yn * (xn * (32 + (2 + (1 + 3) * 3) * 2 + 1 + 3)) * 1 + 5 + 1; size = yn * (xn * (32 + (2 + (1 + 3) * 3) * 2 + 1 + 3)) * 1 + 5 + 1;
size = ROUNDUP(size, FRAMESIZE); size = ROUNDUP(size, FRAMESIZE);
CHECK_NE(MAP_FAILED, (vt = mapanon(size))); CHECK_NOTNULL((vt = _mapanon(size)));
v = RenderImage(vt, yn, xn, rgb); v = RenderImage(vt, yn, xn, rgb);
*v++ = '\r'; *v++ = '\r';
*v++ = 033; *v++ = 033;
@ -532,8 +532,8 @@ static void LoadFile(const char *path, size_t yn, size_t xn, void *rgb) {
CHECK_EQ(CN, 3); CHECK_EQ(CN, 3);
data2size = ROUNDUP(sizeof(float) * goty * gotx * CN, FRAMESIZE); data2size = ROUNDUP(sizeof(float) * goty * gotx * CN, FRAMESIZE);
data3size = ROUNDUP(sizeof(float) * yn * YS * xn * XS * CN, FRAMESIZE); data3size = ROUNDUP(sizeof(float) * yn * YS * xn * XS * CN, FRAMESIZE);
CHECK_NE(MAP_FAILED, (data2 = mapanon(data2size))); CHECK_NOTNULL((data2 = _mapanon(data2size)));
CHECK_NE(MAP_FAILED, (data3 = mapanon(data3size))); CHECK_NOTNULL((data3 = _mapanon(data3size)));
rgb2lin(goty * gotx * CN, data2, data); rgb2lin(goty * gotx * CN, data2, data);
lanczos3(yn * YS, xn * XS, data3, goty, gotx, data2, gotx * 3); lanczos3(yn * YS, xn * XS, data3, goty, gotx, data2, gotx * 3);
rgb2std(yn * YS * xn * XS * CN, rgb, data3); rgb2std(yn * YS * xn * XS * CN, rgb, data3);
@ -603,7 +603,7 @@ int main(int argc, char *argv[]) {
// FIXME: on the conversion stage should do 2Y because of halfblocks // FIXME: on the conversion stage should do 2Y because of halfblocks
// printf( "filename >%s<\tx >%d<\ty >%d<\n\n", filename, x_, y_); // printf( "filename >%s<\tx >%d<\ty >%d<\n\n", filename, x_, y_);
size = y_ * YS * x_ * XS * CN; size = y_ * YS * x_ * XS * CN;
CHECK_NE(MAP_FAILED, (rgb = mapanon(ROUNDUP(size, FRAMESIZE)))); CHECK_NOTNULL((rgb = _mapanon(ROUNDUP(size, FRAMESIZE))));
for (i = optind; i < argc; ++i) { for (i = optind; i < argc; ++i) {
if (!argv[i]) continue; if (!argv[i]) continue;
if (m_) { if (m_) {

View file

@ -37,7 +37,7 @@ forceinline void ConvolveGradient(unsigned yn, unsigned xn,
size_t size; size_t size;
unsigned y, x, i, j, k; unsigned y, x, i, j, k;
float py[4], px[4], (*tmp)[yn][xn][4]; float py[4], px[4], (*tmp)[yn][xn][4];
tmp = mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE))); tmp = _mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE)));
for (y = 0; y < yn - KW + 1; ++y) { for (y = 0; y < yn - KW + 1; ++y) {
for (x = 0; x < xn - KW + 1; ++x) { for (x = 0; x < xn - KW + 1; ++x) {
for (k = 0; k < 4; ++k) py[k] = 0; for (k = 0; k < 4; ++k) py[k] = 0;

View file

@ -751,7 +751,7 @@ static void RasterIt(void) {
static bool once; static bool once;
static void *buf; static void *buf;
if (!once) { if (!once) {
buf = mapanon(ROUNDUP(fb0_.size, FRAMESIZE)); buf = _mapanon(ROUNDUP(fb0_.size, FRAMESIZE));
once = true; once = true;
} }
WriteToFrameBuffer(fb0_.vscreen.yres_virtual, fb0_.vscreen.xres_virtual, buf, WriteToFrameBuffer(fb0_.vscreen.yres_virtual, fb0_.vscreen.xres_virtual, buf,