From 5f4f6b0e6942b6813ecddaf5e45376b3b91766e1 Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Sun, 10 Jul 2022 04:01:17 -0700 Subject: [PATCH] Make _Thread_local work across platforms We now rewrite the binary image at runtime on Windows and XNU to change mov %fs:0,%reg instructions to use %gs instead. There's also simpler threading API introduced by this change and it's called _spawn() and _join(), which has replaced most clone() usage. --- build/definitions.mk | 3 +- examples/greenbean.c | 61 ++--- examples/thread.c | 70 ------ libc/calls/calls.h | 4 +- libc/intrin/futex_wait.c | 1 + libc/intrin/intrin.mk | 1 - libc/intrin/tls.greg.c | 125 ---------- libc/intrin/wait0.c | 8 +- libc/log/oncrash.c | 5 +- libc/runtime/clone-linux.S | 7 +- libc/runtime/clone.c | 225 ++++++++++++------ libc/runtime/getsymboltable.c | 2 +- libc/runtime/mapanon.c | 5 +- .../tls_test.c => libc/runtime/mapstack.c | 39 +-- libc/runtime/runtime.h | 6 +- libc/runtime/threadmode.c | 193 ++++++++++++++- libc/runtime/winthreadlaunch.S | 9 +- libc/str/lz4decode.c | 2 +- libc/sysv/gettls.greg.c | 1 + libc/sysv/sysv.mk | 1 + libc/sysv/tlspolyfill.S | 90 +++++++ libc/thread/create.c | 133 ----------- libc/thread/ctor.S | 2 +- libc/thread/join.c | 72 ------ libc/thread/{init.c => mktls.c} | 68 +++--- libc/thread/spawn.c | 113 +++++++++ libc/thread/spawn.h | 20 ++ libc/thread/thread.h | 1 - test/libc/calls/reservefd_test.c | 18 +- test/libc/calls/test.mk | 1 + test/libc/intrin/kprintf_test.c | 5 +- test/libc/intrin/pthread_mutex_lock_test.c | 96 +------- test/libc/intrin/test.mk | 1 + test/libc/nexgen32e/lz4decode_test.c | 2 +- test/libc/rand/rand64_test.c | 23 +- test/libc/runtime/clone_test.c | 76 ++---- test/libc/runtime/mprotect_test.c | 2 +- test/libc/runtime/test.mk | 1 + test/libc/stdio/dtoa_test.c | 17 +- test/libc/stdio/test.mk | 1 + test/libc/thread/create_test.c | 100 -------- .../{intrin/gettid_test.c => thread/dog.c} | 21 +- .../libc/thread/spawn_test.c | 85 +++---- third_party/python/Objects/obmalloc.c | 2 +- tool/build/build.mk | 1 + tool/build/mkdeps.c | 46 +--- tool/net/net.mk | 1 + tool/net/redbean.c | 63 ++--- tool/viz/derasterize.c | 18 +- tool/viz/lib/sobel.c | 2 +- tool/viz/printvideo.c | 2 +- 51 files changed, 808 insertions(+), 1043 deletions(-) delete mode 100644 examples/thread.c delete mode 100644 libc/intrin/tls.greg.c rename test/libc/intrin/tls_test.c => libc/runtime/mapstack.c (76%) create mode 100644 libc/sysv/tlspolyfill.S delete mode 100644 libc/thread/create.c delete mode 100644 libc/thread/join.c rename libc/thread/{init.c => mktls.c} (56%) create mode 100644 libc/thread/spawn.c create mode 100644 libc/thread/spawn.h delete mode 100644 test/libc/thread/create_test.c rename test/libc/{intrin/gettid_test.c => thread/dog.c} (78%) rename tool/build/wastecpu.c => test/libc/thread/spawn_test.c (53%) diff --git a/build/definitions.mk b/build/definitions.mk index 73f35cbc1..cf10b15ba 100644 --- a/build/definitions.mk +++ b/build/definitions.mk @@ -153,7 +153,8 @@ DEFAULT_COPTS = \ -fno-gnu-unique \ -fstrict-aliasing \ -fstrict-overflow \ - -fno-semantic-interposition + -fno-semantic-interposition \ + -mno-tls-direct-seg-refs MATHEMATICAL = \ -O3 \ diff --git a/examples/greenbean.c b/examples/greenbean.c index c5a0ff59f..23853e2ca 100644 --- a/examples/greenbean.c +++ b/examples/greenbean.c @@ -43,6 +43,7 @@ #include "libc/sysv/consts/sock.h" #include "libc/sysv/consts/sol.h" #include "libc/sysv/consts/tcp.h" +#include "libc/thread/spawn.h" #include "libc/time/struct/tm.h" #include "libc/time/time.h" #include "net/http/http.h" @@ -106,7 +107,7 @@ _Atomic(int) connections; _Atomic(int) closingtime; const char *volatile status; -int Worker(void *id) { +int Worker(void *id, int tid) { int server, yes = 1; // load balance incoming connections for port 8080 across all threads @@ -273,8 +274,7 @@ void PrintStatus(void) { int main(int argc, char *argv[]) { int i; - char **tls; - char **stack; + struct spawn *th; uint32_t *hostips; // ShowCrashReports(); @@ -293,36 +293,23 @@ int main(int argc, char *argv[]) { PORT); } - // spawn over 9,000 worker threads - tls = 0; - stack = 0; threads = argc > 1 ? atoi(argv[1]) : GetCpuCount(); - if ((1 <= threads && threads <= INT_MAX) && - (tls = malloc(threads * sizeof(*tls))) && - (stack = malloc(threads * sizeof(*stack)))) { - for (i = 0; i < threads; ++i) { - if ((tls[i] = __initialize_tls(malloc(64))) && - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) { - ++workers; - if (clone(Worker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_SETTLS | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID, - (void *)(intptr_t)i, 0, tls[i], 64, - (int *)(tls[i] + 0x38)) == -1) { - --workers; - kprintf("error: clone(%d) failed %m\n", i); - } - } else { - kprintf("error: mmap(%d) failed %m\n", i); - } - if (!(i % 500)) { - PrintStatus(); - } - } - } else { + if ((1 <= threads && threads <= 100000)) { kprintf("error: invalid number of threads\n"); + exit(1); + } + + // spawn over 9,000 worker threads + th = calloc(threads, sizeof(*th)); + for (i = 0; i < threads; ++i) { + ++workers; + if (_spawn(Worker, (void *)(intptr_t)i, th + i) == -1) { + --workers; + kprintf("error: _spawn(%d) failed %m\n", i); + } + if (!(i % 500)) { + PrintStatus(); + } } // wait for workers to terminate @@ -335,17 +322,11 @@ int main(int argc, char *argv[]) { kprintf("\r\e[K"); // join the workers - // this is how we guarantee stacks are safe to free - if (tls && stack) { - for (i = 0; i < threads; ++i) { - _wait0((int *)(tls[i] + 0x38)); - munmap(stack[i], GetStackSize()); - free(tls[i]); - } + for (i = 0; i < threads; ++i) { + _join(th + i); } // clean up memory free(hostips); - free(stack); - free(tls); + free(th); } diff --git a/examples/thread.c b/examples/thread.c deleted file mode 100644 index ec253088a..000000000 --- a/examples/thread.c +++ /dev/null @@ -1,70 +0,0 @@ -#if 0 -/*─────────────────────────────────────────────────────────────────╗ -│ To the extent possible under law, Justine Tunney has waived │ -│ all copyright and related or neighboring rights to this file, │ -│ as it is written in the following disclaimers: │ -│ • http://unlicense.org/ │ -│ • http://creativecommons.org/publicdomain/zero/1.0/ │ -╚─────────────────────────────────────────────────────────────────*/ -#endif -#include "libc/calls/calls.h" -#include "libc/dce.h" -#include "libc/intrin/kprintf.h" -#include "libc/log/log.h" -#include "libc/runtime/runtime.h" -#include "libc/stdio/stdio.h" -#include "libc/thread/thread.h" -#include "libc/time/time.h" - -cthread_sem_t semaphore; -_Thread_local int test_tls = 0x12345678; - -static void *worker(void *arg) { - int tid; - cthread_t self; - cthread_sem_signal(&semaphore); - self = cthread_self(); - tid = self->tid; - printf("[%p] %d -> %#x\n", self, tid, test_tls); - if (test_tls != 0x12345678) { - printf(".tdata test #2 failed\n"); - } - return (void *)4; -} - -int main() { - int rc, tid; - void *exitcode; - cthread_t self, thread; - - if (IsWindows() || IsXnu()) { - fprintf(stderr, - "error: can't run example\n" - "_Thread_local only works on Linux/FreeBSD/NetBSD/OpenBSD\n"); - return 1; - } - - self = cthread_self(); - tid = self->tid; - printf("[%p] %d -> %#x\n", self, tid, test_tls); - if (test_tls != 0x12345678) { - printf(".tdata test #1 failed\n"); - } - cthread_sem_init(&semaphore, 0); - rc = cthread_create(&thread, NULL, &worker, NULL); - if (rc == 0) { - cthread_sem_wait(&semaphore, 0, NULL); - printf("thread created: %p\n", thread); -#if 1 - cthread_join(thread, &exitcode); -#else - exitcode = cthread_detach(thread); -#endif - cthread_sem_signal(&semaphore); - cthread_sem_wait(&semaphore, 0, NULL); - printf("thread joined: %p -> %p\n", thread, exitcode); - } else { - fprintf(stderr, "ERROR: thread could not be started: %d\n", rc); - } - return 0; -} diff --git a/libc/calls/calls.h b/libc/calls/calls.h index 5775c0ecc..8a38d0f00 100644 --- a/libc/calls/calls.h +++ b/libc/calls/calls.h @@ -67,6 +67,7 @@ int chdir(const char *); int chmod(const char *, uint32_t); int chown(const char *, uint32_t, uint32_t); int chroot(const char *); +int clone(void *, void *, size_t, int, void *, int *, void *, size_t, int *); int close(int); int creat(const char *, uint32_t); int dup(int); @@ -196,9 +197,6 @@ ssize_t splice(int, int64_t *, int, int64_t *, size_t, uint32_t); ssize_t write(int, const void *, size_t); void sync(void); -int clone(int (*)(void *), void *, size_t, int, void *, int *, void *, size_t, - int *); - COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* COSMOPOLITAN_LIBC_CALLS_SYSCALLS_H_ */ diff --git a/libc/intrin/futex_wait.c b/libc/intrin/futex_wait.c index 96cabc701..5cf41574d 100644 --- a/libc/intrin/futex_wait.c +++ b/libc/intrin/futex_wait.c @@ -18,6 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/strace.internal.h" #include "libc/calls/struct/timespec.h" +#include "libc/errno.h" #include "libc/fmt/itoa.h" #include "libc/intrin/describeflags.internal.h" #include "libc/intrin/futex.internal.h" diff --git a/libc/intrin/intrin.mk b/libc/intrin/intrin.mk index 62f3ed6be..1f8fa41b6 100644 --- a/libc/intrin/intrin.mk +++ b/libc/intrin/intrin.mk @@ -108,7 +108,6 @@ o/$(MODE)/libc/intrin/describeprotflags.o: \ OVERRIDE_CFLAGS += \ -fno-sanitize=address -o/$(MODE)/libc/intrin/tls.greg.o \ o/$(MODE)/libc/intrin/exit.greg.o \ o/$(MODE)/libc/intrin/exit1.greg.o \ o/$(MODE)/libc/intrin/getenv.greg.o \ diff --git a/libc/intrin/tls.greg.c b/libc/intrin/tls.greg.c deleted file mode 100644 index 1df8109d2..000000000 --- a/libc/intrin/tls.greg.c +++ /dev/null @@ -1,125 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" -#include "libc/calls/calls.h" -#include "libc/dce.h" -#include "libc/errno.h" -#include "libc/nexgen32e/threaded.h" -#include "libc/nt/thread.h" -#include "libc/nt/thunk/msabi.h" -#include "libc/sysv/consts/nrlinux.h" - -#define __NR_sysarch 0x000000a5 // freebsd+netbsd -#define AMD64_SET_GSBASE 131 // freebsd -#define AMD64_SET_FSBASE 129 // freebsd -#define X86_SET_GSBASE 16 // netbsd -#define X86_SET_FSBASE 17 // netbsd - -#define __NR___set_tcb 0x00000149 -#define __NR__lwp_setprivate 0x0000013d -#define __NR_thread_fast_set_cthread_self 0x03000003 - -/** - * Initializes thread information block. - * - * Here's the layout your c library assumes: - * - * offset size description - * 0x0000 0x08 linear address pointer - * 0x0030 0x08 linear address pointer - * 0x0038 0x04 tid - * 0x003c 0x04 errno - * - */ -privileged void *__initialize_tls(char tib[64]) { - if (tib) { - *(intptr_t *)(tib + 0x00) = (intptr_t)tib; - *(intptr_t *)(tib + 0x30) = (intptr_t)tib; - *(int *)(tib + 0x38) = -1; // tid - *(int *)(tib + 0x3c) = 0; - } - return tib; -} - -/** - * Installs thread information block on main process. - * - * For example, to set up TLS correctly for the main thread, without - * creating any threads, then it's sufficient to say: - * - * __attribute__((__constructor__)) static void InitTls(void) { - * static char tls[64]; - * __initialize_tls(tls); - * *(int *)(tls + 0x38) = gettid(); - * *(int *)(tls + 0x3c) = __errno; - * __install_tls(tls); - * } - * - * We use a constructor here to make sure it only happens once. Please - * note that calling `clone` will do this automatically. - * - * Installing TLS causes the `__tls_enabled` variable to be set. This - * causes C library features such as `errno` and `gettid()` to use TLS. - * This can help things like recursive mutexes go significantly faster. - * - * To access your TLS storage, you can call `__get_tls()` or - * __get_tls_inline()` which return the address of the `tib`. - * - * @param tib is your thread information block, which must have at least - * 64 bytes on the righthand side of the tib pointer since those are - * the values your C library reserves for itself. memory on the left - * side of the pointer is reserved by the linker for _Thread_local. - */ -privileged void __install_tls(char tib[64]) { - int ax, dx; - assert(tib); - assert(!__tls_enabled); - assert(*(int *)(tib + 0x38) != -1); - if (IsWindows()) { - __tls_index = TlsAlloc(); - asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); - } else if (IsFreebsd()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib) - : "rcx", "r11", "memory", "cc"); - } else if (IsNetbsd()) { - asm volatile("syscall" - : "=a"(ax), "=d"(dx) - : "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib) - : "rcx", "r11", "memory", "cc"); - } else if (IsXnu()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_thread_fast_set_cthread_self), - "D"((intptr_t)tib - 0x30) - : "rcx", "r11", "memory", "cc"); - } else if (IsOpenbsd()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR___set_tcb), "D"(tib) - : "rcx", "r11", "memory", "cc"); - } else { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib) - : "rcx", "r11", "memory"); - } - __tls_enabled = true; -} diff --git a/libc/intrin/wait0.c b/libc/intrin/wait0.c index f48d4d391..ce7e0be21 100644 --- a/libc/intrin/wait0.c +++ b/libc/intrin/wait0.c @@ -16,10 +16,10 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" #include "libc/bits/atomic.h" #include "libc/calls/calls.h" #include "libc/dce.h" +#include "libc/errno.h" #include "libc/intrin/futex.internal.h" #include "libc/intrin/wait0.internal.h" #include "libc/linux/futex.h" @@ -31,13 +31,13 @@ * by the clone() system call when a thread terminates. The purpose of * this operation is to know when it's safe to munmap() a thread stack. */ -void _wait0(const int *ptid) { +void _wait0(const int *ctid) { int x; for (;;) { - if (!(x = atomic_load_explicit(ptid, memory_order_acquire))) { + if (!(x = atomic_load_explicit(ctid, memory_order_acquire))) { break; } else if (IsLinux() /* || IsOpenbsd() */) { - _futex_wait(ptid, x, &(struct timespec){2}); + _futex_wait(ctid, x, &(struct timespec){2}); } else { sched_yield(); } diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index bb5e6f278..311c32036 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -245,9 +245,12 @@ static wontreturn relegated noinstrument void __minicrash(int sig, "RIP %x\n" "RSP %x\n" "RBP %x\n" + "PID %d\n" + "TID %d\n" "\n", kind, sig, __argv[0], ctx ? ctx->uc_mcontext.rip : 0, - ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0); + ctx ? ctx->uc_mcontext.rsp : 0, ctx ? ctx->uc_mcontext.rbp : 0, __pid, + sys_gettid()); __restorewintty(); _Exit(119); } diff --git a/libc/runtime/clone-linux.S b/libc/runtime/clone-linux.S index 1d6d9c85a..bdc5fd028 100644 --- a/libc/runtime/clone-linux.S +++ b/libc/runtime/clone-linux.S @@ -26,7 +26,7 @@ // @param rdx is ptid // @param rcx is ctid // @param r8 is tls -// @param r9 is func +// @param r9 is func(void*,int)→int // @param 8(rsp) is arg // @return tid of child on success, or -1 w/ errno sys_clone_linux: @@ -48,8 +48,9 @@ sys_clone_linux: jmp 0b 2: xor %ebp,%ebp # child thread mov %rbx,%rdi # arg - call *%r9 # func(arg) - xchg %eax,%edi # func(arg) → exitcode + mov (%r10),%esi # tid + call *%r9 # func(arg,tid) + xchg %eax,%edi # func(arg,tid) → exitcode mov $60,%eax # __NR_exit(exitcode) syscall .endfn sys_clone_linux,globl,hidden diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 41bce3b92..4be5df7d0 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -16,7 +16,6 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" #include "libc/calls/calls.h" #include "libc/calls/strace.internal.h" #include "libc/calls/struct/ucontext-netbsd.internal.h" @@ -24,7 +23,6 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/asan.internal.h" -#include "libc/intrin/kprintf.h" #include "libc/intrin/spinlock.h" #include "libc/limits.h" #include "libc/macros.internal.h" @@ -68,19 +66,23 @@ struct CloneArgs { }; union { char lock; - void *pstack; + void *oldrsp; }; + int *ptid; int *ctid; int *ztid; char *tls; - int (*func)(void *); + int (*func)(void *, int); void *arg; }; //////////////////////////////////////////////////////////////////////////////// // THE NEW TECHNOLOGY -int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp); +int WinThreadLaunch(void *arg, // rdi + int tid, // rsi + int (*func)(void *, int), // rdx + intptr_t rsp); // rcx // we can't log this function because: // 1. windows owns the backtrace pointer right now @@ -90,16 +92,20 @@ int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp); // 2. windows owns the stack memory right now // we need win32 raw imports because: // 1. generated thunks are function logged -noasan noinstrument static textwindows wontreturn void WinThreadEntry( - int rdi, int rsi, int rdx, struct CloneArgs *wt) { +noasan noinstrument static textwindows wontreturn void // +WinThreadEntry(int rdi, // rcx + int rsi, // rdx + int rdx, // r8 + struct CloneArgs *wt) { // r9 int rc; if (wt->tls) { asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(wt->tls)); } + *wt->ptid = wt->tid; *wt->ctid = wt->tid; - rc = WinThreadLaunch(wt->arg, wt->func, (intptr_t)wt & -16); + rc = WinThreadLaunch(wt->arg, wt->tid, wt->func, (intptr_t)wt & -16); // we can now clear ctid directly since we're no longer using our own // stack memory, which can now be safely free'd by the parent thread. *wt->ztid = 0; @@ -109,14 +115,16 @@ noasan noinstrument static textwindows wontreturn void WinThreadEntry( unreachable; } -static textwindows int CloneWindows(int (*func)(void *), char *stk, +static textwindows int CloneWindows(int (*func)(void *, int), char *stk, size_t stksz, int flags, void *arg, - void *tls, size_t tlssz, int *ctid) { + void *tls, size_t tlssz, int *ptid, + int *ctid) { int64_t h; struct CloneArgs *wt; wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - sizeof(struct CloneArgs)) & -alignof(struct CloneArgs)); + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->func = func; @@ -133,8 +141,12 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk, //////////////////////////////////////////////////////////////////////////////// // XNU'S NOT UNIX -void XnuThreadThunk(void *pthread, int machport, void *(*func)(void *), - void *arg, intptr_t *stack, unsigned xnuflags); +void XnuThreadThunk(void *pthread, // rdi + int machport, // rsi + void *(*func)(void *), // rdx + void *arg, // rcx + intptr_t *stack, // r8 + unsigned xnuflags); // r9 asm("XnuThreadThunk:\n\t" "xor\t%ebp,%ebp\n\t" "mov\t%r8,%rsp\n\t" @@ -145,11 +157,18 @@ asm("XnuThreadThunk:\n\t" __attribute__((__used__, __no_reorder__)) static wontreturn void -XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg, - struct CloneArgs *wt, unsigned xnuflags) { +XnuThreadMain(void *pthread, // rdi + int tid, // rsi + int (*func)(void *arg, int tid), // rdx + void *arg, // rcx + struct CloneArgs *wt, // r8 + unsigned xnuflags) { // r9 int ax; wt->tid = tid; + *wt->ptid = tid; + *wt->ctid = tid; _spunlock(&wt->lock); + if (wt->tls) { // XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the // Go team at Google that they Apply stands by our ability to use it @@ -159,10 +178,9 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg, : "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30) : "rcx", "r11", "memory", "cc"); } - if (wt->ctid) { - *wt->ctid = tid; - } - func(arg); + + func(arg, tid); + // we no longer use the stack after this point // %rax = int bsdthread_terminate(%rdi = void *stackaddr, // %rsi = size_t freesize, @@ -179,7 +197,7 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg, } static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, - void *arg, void *tls, size_t tlssz, int *ctid) { + void *arg, void *tls, size_t tlssz, int *ptid, int *ctid) { int rc; bool failed; static bool once; @@ -198,6 +216,7 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - sizeof(struct CloneArgs)) & -alignof(struct CloneArgs)); + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = flags & CLONE_SETTLS ? tls : 0; @@ -215,8 +234,9 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, static wontreturn void FreebsdThreadMain(void *p) { struct CloneArgs *wt = p; + *wt->ptid = wt->tid; *wt->ctid = wt->tid; - wt->func(wt->arg); + wt->func(wt->arg, wt->tid); // we no longer use the stack after this point // void thr_exit(%rdi = long *state); asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0 @@ -227,8 +247,9 @@ static wontreturn void FreebsdThreadMain(void *p) { unreachable; } -static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, void *tls, size_t tlssz, int *ctid) { +static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, + int flags, void *arg, void *tls, size_t tlssz, + int *ptid, int *ctid) { int ax; bool failed; int64_t tid; @@ -236,6 +257,7 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) - sizeof(struct CloneArgs)) & -alignof(struct CloneArgs)); + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = tls; @@ -267,7 +289,9 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, static wontreturn void OpenbsdThreadMain(void *p) { struct CloneArgs *wt = p; - wt->func(wt->arg); + *wt->ptid = wt->tid; + *wt->ctid = wt->tid; + wt->func(wt->arg, wt->tid); // we no longer use the stack after this point. however openbsd // validates the rsp register too so a race condition can still // happen if the parent tries to free the stack. we'll solve it @@ -279,13 +303,14 @@ static wontreturn void OpenbsdThreadMain(void *p) { "movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0 "syscall" // __threxit() : "=m"(*wt->ztid) - : "a"(302), "m"(wt->pstack), "D"(wt->ztid) + : "a"(302), "m"(wt->oldrsp), "D"(wt->ztid) : "rcx", "r11", "memory"); unreachable; } -static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, void *tls, size_t tlssz, int *ctid) { +static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz, + int flags, void *arg, void *tls, size_t tlssz, + int *ptid, int *ctid) { int tid; intptr_t sp; struct __tfork *tf; @@ -297,13 +322,15 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, sp -= sizeof(struct CloneArgs); sp &= -MAX(16, alignof(struct CloneArgs)); wt = (struct CloneArgs *)sp; + wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; + wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; - wt->pstack = __builtin_frame_address(0); + wt->oldrsp = __builtin_frame_address(0); wt->arg = arg; wt->func = func; tf->tf_stack = (char *)wt - 8; tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0; - tf->tf_tid = flags & CLONE_CHILD_SETTID ? ctid : 0; + tf->tf_tid = &wt->tid; if ((tid = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) < 0) { errno = -tid; tid = -1; @@ -314,11 +341,17 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, //////////////////////////////////////////////////////////////////////////////// // NET BESIYATA DISHMAYA -static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg), - int *tid, int *ctid, int *ztid) { +static wontreturn void NetbsdThreadMain(void *arg, // rdi + int (*func)(void *, int), // rsi + int *tid, // rdx + int *ctid, // rcx + int *ztid, // r8 + int *ptid) { // r9 int ax, dx; - *ctid = *tid; - func(arg); + ax = *tid; + *ptid = ax; + *ctid = ax; + func(arg, ax); // we no longer use the stack after this point // %eax = int __lwp_exit(void); asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0 @@ -330,8 +363,9 @@ static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg), unreachable; } -static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, void *tls, size_t tlssz, int *ctid) { +static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz, + int flags, void *arg, void *tls, size_t tlssz, int *ptid, + int *ctid) { // NetBSD has its own clone() and it works, but it's technically a // second-class API, intended to help Linux folks migrate to this. bool failed; @@ -341,7 +375,6 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, static int broken; struct ucontext_netbsd *ctx; static struct ucontext_netbsd netbsd_clone_template; - _Static_assert(sizeof(struct ucontext_netbsd) == 784, "fix assembly"); // memoize arbitrary valid processor state structure if (!once) { @@ -360,7 +393,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, } sp = (intptr_t)(stk + stksz); - // allocate memory for child tid + // allocate memory for tid sp -= sizeof(int); sp = sp & -alignof(int); tid = (int *)sp; @@ -388,6 +421,7 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, ctx->uc_mcontext.rdx = (intptr_t)tid; ctx->uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid); ctx->uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid); + ctx->uc_mcontext.r9 = (intptr_t)(flags & CLONE_PARENT_SETTID ? ptid : tid); ctx->uc_flags |= _UC_STACK; ctx->uc_stack.ss_sp = stk; ctx->uc_stack.ss_size = stksz; @@ -413,8 +447,28 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, //////////////////////////////////////////////////////////////////////////////// // GNU/SYSTEMD -int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls, - int (*func)(void *), void *arg); +int sys_clone_linux(int flags, // rdi + long sp, // rsi + int *ptid, // rdx + int *ctid, // rcx + void *tls, // r8 + void *func, // r9 + void *arg); // 8(rsp) + +static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz, + int flags, void *arg, void *tls, size_t tlssz, int *ptid, + int *ctid) { + long sp; + sp = (intptr_t)(stk + stksz); + if (~flags & CLONE_CHILD_SETTID) { + flags |= CLONE_CHILD_SETTID; + sp -= sizeof(int); + sp = sp & -alignof(int); + ctid = (int *)sp; + } + sp = sp & -16; // align the stack + return sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg); +} //////////////////////////////////////////////////////////////////////////////// // COSMOPOLITAN @@ -461,36 +515,69 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls, * other calls like getpid() may return incorrect values. * * @param func is your callback function, which this wrapper requires - * not be null, otherwise EINVAL is raised + * not be null, otherwise EINVAL is raised. It is passed two args + * within the child thread: (1) the caller-supplied `arg` and (2) + * the new tid is always passed in the second arg for convenience + * * @param stk points to the bottom of a caller allocated stack, which * must be allocated via mmap() using the MAP_STACK flag, or else * you won't get optimal performance and it won't work on OpenBSD + * * @param stksz is the size of that stack in bytes, we recommend that * that this be set to GetStackSize() or else memory safety tools * like kprintf() can't do as good and quick of a job; this value * must be 16-aligned plus it must be at least 4192 bytes in size * and it's advised to have the bottom-most page, be a guard page - * @param flags should have: - * - `CLONE_THREAD|CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND` - * and you may optionally bitwise or any of the following: - * - `CLONE_CHILD_SETTID` is needed too if you use `ctid` which - * is part of the memory the child owns and it'll be set right - * before the callback function is invoked - * - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon termination - * which can be used to implement join so that the parent may - * safely free the stack memory that the child is using - * - `CLONE_PARENT_SETTID` is needed too if you use `ptid` and this - * is guaranteed to happen before clone() returns - * - `CLONE_SETTLS` is needed too if you set `tls`. You may get this - * value from the thread by calling __get_tls(). There are a few - * layout expectations imposed by your C library. Those are all - * documented by __initialize_tls() which initializes the parts of - * the first 64 bytes of tls memory that libc cares about. This - * flag will transition the C runtime to the `__tls_enabled` state - * automatically. If it's used for one thread, then it must be - * used for all threads. The first time it's used, it must be used - * from the main thread. - * @param arg will be passed to your callback + * + * @param flags which SHOULD always have all of these flags: + * + * - `CLONE_THREAD` + * - `CLONE_VM` + * - `CLONE_FS` + * - `CLONE_FILES` + * - `CLONE_SIGHAND` + * + * This system call wrapper is intended for threads, and as such, we + * won't polyfill Linux's ability to simulate unrelated calls (e.g. + * fork, vfork) via clone() on other platforms. Please just call + * fork() and vfork() when that's what you want. + * + * Your `flags` may also optionally also additionally bitwise-OR any + * combination of the following additional flags: + * + * - `CLONE_PARENT_SETTID` must be specified if you intend to set + * the `ptid` argument, which is guaranteed to be updated with the + * child tid BEFORE BOTH clone() returns and `func` is invoked + * + * - `CLONE_CHILD_SETTID` must be specified if you intend to set the + * `ctid` argument, which is guaranteed to be updated with the + * child tid before `func` is called, however we CAN NOT guarantee + * this will happen BEFORE clone() returns + * + * - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon child thread + * termination. This is used to implement join so that the parent + * may know when it's safe to free the child's stack memory, and + * as such, is guaranteed to happen AFTER the child thread has + * either terminated or has finished using its stack memory + * + * - `CLONE_SETTLS` is needed if you intend to specify the `tls` + * argument, which provides a fast-path solution for changing the + * appropriate TLS segment register within the child thread. The + * child thread may then obtain a reference to the TIB address you + * supplied, by calling __get_tls(). Your C library holds certain + * expectations about the layout of your Thread Information Block + * (TIB), which are all documented by __initialize_tls(). That + * function can be used to initialize the first positive 64 bytes + * of your TLS allocation, which is the memory Cosmopolitan Libc + * wants for itself (and negative addresses are reserved by the + * GNU Linker). Using this flag will transition the C runtime to a + * `__tls_enabled` state automatically. If you use TLS for just + * one thread, then you must be specify TLS for ALL THREADS. It's + * a good idea to do that since TLS can offer considerable (i.e. + * multiple orders of a magnitude) performance improvement for + * TID-dependent C library services, e.g. recursive mutexes. + * + * @param arg is passed as an argument to `func` in the child thread * @param tls may be used to set the thread local storage segment; * this parameter is ignored if `CLONE_SETTLS` is not set * @param tlssz is the size of tls in bytes which must be at least 64 @@ -499,8 +586,8 @@ int sys_clone_linux(int flags, char *stk, int *ptid, int *ctid, void *tls, * @return tid of child on success, or -1 w/ errno * @threadsafe */ -int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, - int *ptid, void *tls, size_t tlssz, int *ctid) { +int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid, + void *tls, size_t tlssz, int *ctid) { int rc; struct CloneArgs *wt; @@ -529,8 +616,7 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, !__asan_is_valid(ctid, sizeof(*ctid))))) { rc = efault(); } else if (IsLinux()) { - rc = - sys_clone_linux(flags, (char *)stk + stksz, ptid, ctid, tls, func, arg); + rc = CloneLinux(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else if (!IsTiny() && (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) != @@ -539,19 +625,20 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, STRACE("clone flag unsupported on this platform"); rc = einval(); } else if (IsXnu()) { - rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ctid); + rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else if (IsFreebsd()) { - rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); + rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else if (IsNetbsd()) { - rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); + rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else if (IsOpenbsd()) { - rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); + rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else if (IsWindows()) { - rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ctid); + rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ptid, ctid); } else { rc = enosys(); } + // TODO(jart): do we need it? if (rc != -1 && (flags & CLONE_PARENT_SETTID)) { *ptid = rc; } diff --git a/libc/runtime/getsymboltable.c b/libc/runtime/getsymboltable.c index 90034f82b..e39dbf8a2 100644 --- a/libc/runtime/getsymboltable.c +++ b/libc/runtime/getsymboltable.c @@ -64,7 +64,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) { lf = GetZipCfileOffset(zipos->map + cf); size = GetZipLfileUncompressedSize(zipos->map + lf); size2 = ROUNDUP(size, FRAMESIZE); - if ((res = mapanon(size2))) { + if ((res = _mapanon(size2))) { switch (ZIP_LFILE_COMPRESSIONMETHOD(zipos->map + lf)) { case kZipCompressionNone: memcpy(res, (void *)ZIP_LFILE_CONTENT(zipos->map + lf), size); diff --git a/libc/runtime/mapanon.c b/libc/runtime/mapanon.c index 6b31c87b6..1291a5ec8 100644 --- a/libc/runtime/mapanon.c +++ b/libc/runtime/mapanon.c @@ -54,13 +54,16 @@ * } * * That is performed automatically for unit test executables. + * + * @return memory map address on success, or null w/ errrno */ -noasan void *mapanon(size_t size) { +void *_mapanon(size_t size) { /* asan runtime depends on this function */ void *m; m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (m == MAP_FAILED && weaken(__oom_hook)) { weaken(__oom_hook)(size); + return 0; } return m; } diff --git a/test/libc/intrin/tls_test.c b/libc/runtime/mapstack.c similarity index 76% rename from test/libc/intrin/tls_test.c rename to libc/runtime/mapstack.c index ebfcb6df6..9b8e0874d 100644 --- a/test/libc/intrin/tls_test.c +++ b/libc/runtime/mapstack.c @@ -16,25 +16,26 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" -#include "libc/errno.h" -#include "libc/nexgen32e/gettls.h" -#include "libc/nexgen32e/threaded.h" #include "libc/runtime/runtime.h" -#include "libc/testlib/testlib.h" +#include "libc/runtime/stack.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" -static char tib[64]; - -TEST(tls, test) { - errno = 31337; - EXPECT_EQ(31337, errno); - EXPECT_EQ(&__errno, __errno_location()); - __initialize_tls(tib); - *(int *)((char *)tib + 0x38) = gettid(); - *(int *)((char *)tib + 0x3c) = __errno; - __install_tls(tib); - EXPECT_EQ(31337, errno); - EXPECT_EQ(tib, __get_tls()); - EXPECT_EQ(tib, __get_tls_inline()); - EXPECT_EQ(tib + 0x3c, (char *)__errno_location()); +/** + * Allocates stack. + * + * @return stack bottom address on success, or null w/ errrno + */ +void *_mapstack(void) { + return mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, + MAP_STACK | MAP_ANONYMOUS, -1, 0); +} + +/** + * Frees stack. + * + * @param stk was allocated by _mapstack() + */ +int _freestack(void *stk) { + return munmap(stk, GetStackSize()); } diff --git a/libc/runtime/runtime.h b/libc/runtime/runtime.h index 75b74683e..72cfc2417 100644 --- a/libc/runtime/runtime.h +++ b/libc/runtime/runtime.h @@ -6,7 +6,7 @@ COSMOPOLITAN_C_START_ │ cosmopolitan § runtime ─╬─│┼ ╚────────────────────────────────────────────────────────────────────────────│*/ -typedef long jmp_buf[8] forcealign(CACHELINE); +typedef long jmp_buf[8]; extern char **environ; /* CRT */ extern int __argc; /* CRT */ @@ -45,8 +45,10 @@ extern size_t __virtualmax; extern bool __isworker; void mcount(void); +int _freestack(void *); unsigned long getauxval(unsigned long); -void *mapanon(size_t) attributeallocsize((1)); +void *_mapanon(size_t) attributeallocsize((1)) mallocesque; +void *_mapstack(void) returnsaligned((FRAMESIZE)) mallocesque; int setjmp(jmp_buf) libcesque returnstwice paramsnonnull(); void longjmp(jmp_buf, int) libcesque wontreturn paramsnonnull(); axdx_t setlongerjmp(jmp_buf) libcesque returnstwice paramsnonnull(); diff --git a/libc/runtime/threadmode.c b/libc/runtime/threadmode.c index a0fdb935c..58da18e11 100644 --- a/libc/runtime/threadmode.c +++ b/libc/runtime/threadmode.c @@ -16,29 +16,206 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/assert.h" +#include "libc/bits/bits.h" #include "libc/calls/calls.h" #include "libc/calls/syscall-sysv.internal.h" +#include "libc/dce.h" #include "libc/errno.h" +#include "libc/intrin/kprintf.h" +#include "libc/macros.internal.h" #include "libc/nexgen32e/threaded.h" +#include "libc/nt/thread.h" +#include "libc/nt/thunk/msabi.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/nrlinux.h" +#include "libc/thread/thread.h" +#include "third_party/xed/x86.h" + +#define __NR_sysarch 0x000000a5 // freebsd+netbsd +#define AMD64_SET_GSBASE 131 // freebsd +#define AMD64_SET_FSBASE 129 // freebsd +#define X86_SET_GSBASE 16 // netbsd +#define X86_SET_FSBASE 17 // netbsd + +#define __NR___set_tcb 0x00000149 +#define __NR__lwp_setprivate 0x0000013d +#define __NR_thread_fast_set_cthread_self 0x03000003 + +#define _TLSZ ((intptr_t)_tls_size) +#define _TLDZ ((intptr_t)_tdata_size) +#define _TIBZ sizeof(struct cthread_descriptor_t) -static char tibdefault[64]; extern int __threadcalls_end[]; extern int __threadcalls_start[]; +extern unsigned char __get_tls_nt_rax[]; +__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; -void __enable_tls(void) { - __initialize_tls(tibdefault); - *(int *)((char *)tibdefault + 0x38) = sys_gettid(); - *(int *)((char *)tibdefault + 0x3c) = __errno; - __install_tls(tibdefault); +privileged void __enable_tls(void) { + assert(!__threaded); + assert(!__tls_enabled); + + // allocate tls memory for main process + // + // %fs Linux/BSDs + // │ + // _Thread_local │ __get_tls() + // ┌───┬──────────┬──────────┼───┐ + // │pad│ .tdata │ .tbss │tib│ + // └───┴──────────┴──────────┼───┘ + // │ + // Windows/Mac %gs + // + size_t siz; + cthread_t tib; + char *mem, *tls; + siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE); + mem = _mapanon(siz); + tib = (cthread_t)(mem + siz - _TIBZ); + tls = mem + siz - _TIBZ - _TLSZ; + tib->self = tib; + tib->self2 = tib; + tib->err = __errno; + tib->tid = sys_gettid(); + memmove(tls, _tdata_start, _TLDZ); + + // ask the operating system to change the x86 segment register + int ax, dx; + if (IsWindows()) { + __tls_index = __imp_TlsAlloc(); + asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); + } else if (IsFreebsd()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib) + : "rcx", "r11", "memory", "cc"); + } else if (IsNetbsd()) { + asm volatile("syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib) + : "rcx", "r11", "memory", "cc"); + } else if (IsXnu()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_thread_fast_set_cthread_self), + "D"((intptr_t)tib - 0x30) + : "rcx", "r11", "memory", "cc"); + } else if (IsOpenbsd()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR___set_tcb), "D"(tib) + : "rcx", "r11", "memory", "cc"); + } else { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib) + : "rcx", "r11", "memory"); + } + + /* + * We need to rewrite SysV _Thread_local code. You MUST use the + * -mno-tls-direct-seg-refs flag which generates code like this + * + * 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R + * + * Which on Mac we can replace with this: + * + * 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R + * + * Whereas on Windows we'll replace it with this: + * + * 0f 1f 40 00 fatnop4 + * e8 xx xx xx xx call __get_tls_nt_%R + * + * Since we have no idea where the TLS instructions exist in the + * binary, we need to disassemble the whole program image. This'll + * potentially take a few milliseconds for some larger programs. + * + * TODO(jart): compute probability this is just overkill + */ + if (IsWindows() || IsXnu()) { + int n, reg, dis; + unsigned char *p; + struct XedDecodedInst xedd; + __morph_begin(); + + // The most expensive part of this process is we need to compute the + // byte length of each instruction in our program. We'll use Intel's + // disassembler for this purpose. + for (p = _ereal; p < __privileged_start; p += n) { + xed_decoded_inst_zero_set_mode(&xedd, XED_MACHINE_MODE_LONG_64); + if (!xed_instruction_length_decode(&xedd, p, 15)) { + + // We now know p[0] is most likely the first byte of an x86 op. + // Let's check and see if it's the GCC linear TIB address load. + // We hope and pray GCC won't generate TLS stores to %r8..%r15. + if (xedd.length == 9 && // + 0144 == p[0] && // fs + 0110 == p[1] && // rex.w (64-bit operand size) + 0213 == p[2] && // mov reg/mem → reg (word-sized) + 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg + 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 + 0000 == p[5] && // displacement (von Neumann endian) + 0000 == p[6] && // displacement + 0000 == p[7] && // displacement + 0000 == p[8]) { // displacement + + // Apple is quite straightforward to patch. We basically + // just change the segment register, and the linear slot + if (IsXnu()) { + p[0] = 0145; // this changes gs segment to fs segment + p[5] = 0x30; // tib slot index for tib linear address + } + + // Windows is kind of complicated. We need to replace the + // segment mov instruction with a function call, that (a) + // won't clobber registers, and (b) has a return register + // that's the same as the mov destination. When setting + // function displacement, &CALL+5+DISP must equal &FUNC. + else { + reg = (p[3] & 070) >> 3; + dis = (__get_tls_nt_rax + reg * 18) - (p + 9); + p[0] = 0017; // map1 + p[1] = 0037; // nopl (onl if reg=0) + p[2] = 0100; // mod/rm (%rax)+disp8 + p[3] = 0000; // displacement + p[4] = 0350; // call + p[5] = (dis & 0x000000ff) >> 000; // displacement + p[6] = (dis & 0x0000ff00) >> 010; // displacement + p[7] = (dis & 0x00ff0000) >> 020; // displacement + p[8] = (dis & 0xff000000) >> 030; // displacement + } + } + + // Move to the next instruction. + n = xedd.length; + } else { + // If Xed failed to decode the instruction, then we'll just plow + // through memory one byte at a time until Xed's morale improves + n = 1; + } + } + + __morph_end(); + } + + // we are now allowed to use tls + __tls_enabled = true; } privileged void __enable_threads(void) { + assert(!__threaded); __threaded = gettid(); + __morph_begin(); /* * _NOPL("__threadcalls", func) * + * The big ugly macro above is used by Cosmopolitan Libc to unser + * locking primitive (e.g. flockfile, funlockfile) have zero impact on + * performance and binary size when threads aren't actually in play. + * * we have this * * 0f 1f 05 b1 19 00 00 nopl func(%rip) @@ -46,8 +223,10 @@ privileged void __enable_threads(void) { * we're going to turn it into this * * 67 67 e8 b1 19 00 00 addr32 addr32 call func + * + * This is cheap and fast because the big ugly macro stored in the + * binary the offsets of all the instructions we need to change. */ - __morph_begin(); for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) { _base[*p + 0] = 0x67; _base[*p + 1] = 0x67; diff --git a/libc/runtime/winthreadlaunch.S b/libc/runtime/winthreadlaunch.S index fc86864be..6bcee3c23 100644 --- a/libc/runtime/winthreadlaunch.S +++ b/libc/runtime/winthreadlaunch.S @@ -26,8 +26,9 @@ // runtime facilities. // // @param %rdi is arg -// @param %rsi is func -// @param %rdx is stack +// @param %rsi is tid +// @param %rdx is func +// @param %rcx is stack // @return %rax is exit code // @see clone() WinThreadLaunch: @@ -35,9 +36,9 @@ WinThreadLaunch: push %r15 mov %rbp,%r15 mov %rsp,%rbx - mov %rdx,%rsp + mov %rcx,%rsp xor %rbp,%rbp - call *%rsi + call *%rdx mov %r15,%rbp mov %rbx,%rsp pop %r15 diff --git a/libc/str/lz4decode.c b/libc/str/lz4decode.c index 2c1771761..c22205111 100644 --- a/libc/str/lz4decode.c +++ b/libc/str/lz4decode.c @@ -32,7 +32,7 @@ * they are passed in the ≤64kb bytes preceding src. * * @return pointer to end of decoded data, similar to mempcpy() - * @see mapanon(), lz4check() + * @see _mapanon(), lz4check() */ void *lz4decode(void *dest, const void *src) { const unsigned char *frame, *block; diff --git a/libc/sysv/gettls.greg.c b/libc/sysv/gettls.greg.c index ac5a057e5..b5dbd608c 100644 --- a/libc/sysv/gettls.greg.c +++ b/libc/sysv/gettls.greg.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/nexgen32e/gettls.h" +#include "libc/nexgen32e/threaded.h" /** * Returns address of thread information block. diff --git a/libc/sysv/sysv.mk b/libc/sysv/sysv.mk index 66d6f8e28..5c72daca5 100644 --- a/libc/sysv/sysv.mk +++ b/libc/sysv/sysv.mk @@ -40,6 +40,7 @@ LIBC_SYSV_A_FILES := \ libc/sysv/errno_location.greg.c \ libc/sysv/errno.c \ libc/sysv/gettls.greg.c \ + libc/sysv/tlspolyfill.S \ libc/sysv/errfun.S \ libc/sysv/strace.greg.c \ libc/sysv/describeos.greg.c \ diff --git a/libc/sysv/tlspolyfill.S b/libc/sysv/tlspolyfill.S new file mode 100644 index 000000000..f0c489e35 --- /dev/null +++ b/libc/sysv/tlspolyfill.S @@ -0,0 +1,90 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + +// Code morphing TLS polyfills for The New Technology. +// +// @note msvc generates this code so it's stable +// @note func ordering follows x86 reg encoding +// @note each function is exactly 18 bytes +// @see __enable_threads() + +__get_tls_nt_rax: + push %rcx + mov __tls_index(%rip),%ecx + mov %gs:0x1480(,%rcx,8),%rax + pop %rcx + ret + .endfn __get_tls_nt_rax,globl,hidden + +__get_tls_nt_rcx: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rcx + pop %rax + ret + .endfn __get_tls_nt_rcx + +__get_tls_nt_rdx: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rdx + pop %rax + ret + .endfn __get_tls_nt_rdx + +__get_tls_nt_rbx: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rbx + pop %rax + ret + .endfn __get_tls_nt_rbx + +__get_tls_nt_rsp: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rsp + pop %rax + ret + .endfn __get_tls_nt_rsp + +__get_tls_nt_rbp: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rbp + pop %rax + ret + .endfn __get_tls_nt_rbp + +__get_tls_nt_rsi: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rsi + pop %rax + ret + .endfn __get_tls_nt_rsi + +__get_tls_nt_rdi: + push %rax + mov __tls_index(%rip),%eax + mov %gs:0x1480(,%rax,8),%rdi + pop %rax + ret + .endfn __get_tls_nt_rdi diff --git a/libc/thread/create.c b/libc/thread/create.c deleted file mode 100644 index b01b1c7e2..000000000 --- a/libc/thread/create.c +++ /dev/null @@ -1,133 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/atomic.h" -#include "libc/calls/calls.h" -#include "libc/calls/strace.internal.h" -#include "libc/errno.h" -#include "libc/intrin/setjmp.internal.h" -#include "libc/macros.internal.h" -#include "libc/nexgen32e/threaded.h" -#include "libc/runtime/internal.h" -#include "libc/runtime/runtime.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/clone.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" -#include "libc/thread/internal.h" -#include "libc/thread/thread.h" - -STATIC_YOINK("_main_thread_ctor"); - -static cthread_t cthread_allocate(const cthread_attr_t *attr) { - char *mem; - size_t size; - cthread_t td; - size = ROUNDUP( - attr->stacksize + - ROUNDUP((uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t), - PAGESIZE), - FRAMESIZE); - mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) return 0; - if (attr->guardsize > PAGESIZE) { - mprotect(mem, attr->guardsize, PROT_NONE); - } - td = (cthread_t)(mem + size - sizeof(struct cthread_descriptor_t)); - td->self = td; - td->self2 = td; - td->err = errno; - td->tid = -1; - td->stack.bottom = mem; - td->stack.top = mem + attr->stacksize; - td->alloc.bottom = mem; - td->alloc.top = mem + size; - if (attr->mode & CTHREAD_CREATE_DETACHED) { - td->state = cthread_detached; - } else { - td->state = cthread_started; - } - // Initialize TLS with content of .tdata section - memmove((void *)((intptr_t)td - (intptr_t)_tls_size), _tdata_start, - (intptr_t)_tdata_size); - return td; -} - -static int cthread_start(void *arg) { - axdx_t rc; - void *exitcode; - cthread_t td = arg; - if (!(rc = setlongerjmp(td->exiter)).ax) { - exitcode = td->func(td->arg); - } else { - exitcode = (void *)rc.dx; - } - td->exitcode = exitcode; - _pthread_key_destruct(td->key); - if (atomic_load(&td->state) & cthread_detached) { - // we're still using the stack - // thus we can't munmap it yet - // kick the can down the road! - cthread_zombies_add(td); - } - atomic_fetch_add(&td->state, cthread_finished); - return 0; -} - -/** - * Creates thread. - * - * @param ptd will receive pointer to new thread descriptor - * @param attr contains special configuration if non-null - * @param func is thread callback function - * @param arg is argument supplied to `func` - * @return 0 on success, or error number on failure - * @threadsafe - */ -int cthread_create(cthread_t *ptd, const cthread_attr_t *attr, - void *(*func)(void *), void *arg) { - int rc, tid; - cthread_t td; - cthread_attr_t default_attr; - __threaded = true; - cthread_zombies_reap(); - cthread_attr_init(&default_attr); - if ((td = cthread_allocate(attr ? attr : &default_attr))) { - td->func = func; - td->arg = arg; - cthread_attr_destroy(&default_attr); - tid = - clone(cthread_start, td->stack.bottom, td->stack.top - td->stack.bottom, - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, - td, 0, td, sizeof(struct cthread_descriptor_t), &td->tid); - if (tid != -1) { - *ptd = td; - rc = 0; - } else { - rc = errno; - munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom); - } - } else { - rc = errno; - tid = -1; - } - STRACE("cthread_create([%d], %p, %p, %p) → %s", tid, attr, func, arg, - !rc ? "0" : strerrno(rc)); - return rc; -} diff --git a/libc/thread/ctor.S b/libc/thread/ctor.S index d8fddefc8..8139b3d87 100644 --- a/libc/thread/ctor.S +++ b/libc/thread/ctor.S @@ -21,7 +21,7 @@ .init.start 400,_main_thread_ctor push %rdi push %rsi - call _main_thread_init + call __enable_tls pop %rsi pop %rdi .init.end 400,_main_thread_ctor diff --git a/libc/thread/join.c b/libc/thread/join.c deleted file mode 100644 index 9f8c55d22..000000000 --- a/libc/thread/join.c +++ /dev/null @@ -1,72 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/atomic.h" -#include "libc/calls/calls.h" -#include "libc/calls/strace.internal.h" -#include "libc/dce.h" -#include "libc/errno.h" -#include "libc/intrin/asan.internal.h" -#include "libc/runtime/runtime.h" -#include "libc/str/str.h" -#include "libc/sysv/consts/futex.h" -#include "libc/sysv/consts/nr.h" -#include "libc/thread/thread.h" - -/** - * Waits for thread to terminate and frees its memory. - * - * @param td is thread descriptor memory - * @param exitcode optionally receives value returned by thread - * @return 0 on success, or error number on failure - * @raises EDEADLK when trying to join this thread - * @raises EINVAL if another thread is joining - * @raises ESRCH if no such thread exists - * @raises EINVAL if not joinable - * @threadsafe - */ -int cthread_join(cthread_t td, void **exitcode) { - int x, rc, tid; - // otherwise, tid could be set to 0 even though `state` is not - // finished mark thread as joining - if (!td || (IsAsan() && !__asan_is_valid(td, sizeof(*td)))) { - rc = ESRCH; - tid = -1; - } else if ((tid = td->tid) == gettid()) { // tid must load before lock xadd - rc = EDEADLK; - } else if (atomic_load(&td->state) & (cthread_detached | cthread_joining)) { - rc = EINVAL; - } else { - if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) { - while ((x = atomic_load(&td->tid))) { - cthread_memory_wait32(&td->tid, x, 0); - } - } - if (exitcode) { - *exitcode = td->exitcode; - } - if (!munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom)) { - rc = 0; - } else { - rc = errno; - } - } - STRACE("cthread_join(%d, [%p]) → %s", tid, !rc && exitcode ? *exitcode : 0, - !rc ? "0" : strerrno(rc)); - return rc; -} diff --git a/libc/thread/init.c b/libc/thread/mktls.c similarity index 56% rename from libc/thread/init.c rename to libc/thread/mktls.c index c4da0ceff..c6f1498c5 100644 --- a/libc/thread/init.c +++ b/libc/thread/mktls.c @@ -1,7 +1,7 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ @@ -16,53 +16,41 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" -#include "libc/calls/calls.h" -#include "libc/errno.h" #include "libc/macros.internal.h" -#include "libc/nexgen32e/threaded.h" +#include "libc/mem/mem.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" -#include "libc/runtime/stack.h" #include "libc/str/str.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" +#include "libc/thread/spawn.h" #include "libc/thread/thread.h" -textstartup void _main_thread_init(void) { - _Static_assert(offsetof(struct cthread_descriptor_t, self) == 0x00, ""); - _Static_assert(offsetof(struct cthread_descriptor_t, self2) == 0x30, ""); - _Static_assert(offsetof(struct cthread_descriptor_t, tid) == 0x38, ""); - _Static_assert(offsetof(struct cthread_descriptor_t, err) == 0x3c, ""); - cthread_t td; - size_t totalsize; - char *mem, *bottom, *top; +#define _TLSZ ((intptr_t)_tls_size) +#define _TLDZ ((intptr_t)_tdata_size) +#define _TIBZ sizeof(struct cthread_descriptor_t) +#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t)) - totalsize = ROUNDUP( - (uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t), FRAMESIZE); +/** + * Allocates thread-local storage memory for new thread. + * @return buffer that must be released with free() + */ +char *_mktls(char **out_tib) { + char *tls; + cthread_t tib; - mem = mmap(0, totalsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, - -1, 0); - assert(mem != MAP_FAILED); + // Allocate enough TLS memory for all the GNU Linuker (_tls_size) + // organized _Thread_local data, as well as Cosmpolitan Libc (64) + if (!(tls = calloc(1, _MEMZ))) return 0; - bottom = mem; - top = mem + totalsize; + // set up thread informaiton block + tib = (cthread_t)(tls + _MEMZ - _TIBZ); + tib->self = tib; + tib->self2 = tib; + tib->err = 0; + tib->tid = -1; + memmove(tls, _tdata_start, _TLDZ); - td = (cthread_t)(top - sizeof(struct cthread_descriptor_t)); - td->self = td; - td->self2 = td; - td->err = errno; - td->tid = gettid(); - td->alloc.bottom = bottom; - td->alloc.top = top; - td->stack.bottom = GetStackAddr(0); - td->stack.top = td->stack.bottom + GetStackSize(); - td->state = cthread_main; - - // Initialize TLS with content of .tdata section - memmove((void *)((uintptr_t)td - (uintptr_t)_tls_size), _tdata_start, - (uintptr_t)_tdata_size); - - // Set FS - __install_tls((char *)td); + if (out_tib) { + *out_tib = (char *)tib; + } + return tls; } diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c new file mode 100644 index 000000000..a051b84fd --- /dev/null +++ b/libc/thread/spawn.c @@ -0,0 +1,113 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/intrin/kprintf.h" +#include "libc/intrin/wait0.internal.h" +#include "libc/macros.internal.h" +#include "libc/mem/mem.h" +#include "libc/nexgen32e/threaded.h" +#include "libc/runtime/internal.h" +#include "libc/runtime/runtime.h" +#include "libc/runtime/stack.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/clone.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/prot.h" +#include "libc/thread/spawn.h" +#include "libc/thread/thread.h" + +STATIC_YOINK("_main_thread_ctor"); + +/** + * @fileoverview Simple System Threads API + */ + +#define _TLSZ ((intptr_t)_tls_size) +#define _TLDZ ((intptr_t)_tdata_size) +#define _TIBZ sizeof(struct cthread_descriptor_t) +#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t)) + +/** + * Spawns thread. + * + * @param fun is thread worker callback, which receives `arg` and `ctid` + * @param arg shall be passed to `fun` + * @param opt_out_thread needn't be initialiized and is always clobbered + * except when it isn't specified, in which case, the thread is kind + * of detached and will leak in stack / tls memory + * @return 0 on success, or -1 w/ errno + */ +int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) { + struct spawn *th, ths; + + // we need to to clobber the output memory before calling clone, since + // there's no guarantee clone() won't suspend the parent, and focus on + // running the child instead; in that case child might want to read it + if (opt_out_thread) { + th = opt_out_thread; + } else { + th = &ths; + } + + // Allocate enough TLS memory for all the GNU Linuker (_tls_size) + // organized _Thread_local data, as well as Cosmpolitan Libc (64) + if (!(th->tls = _mktls(&th->tib))) { + return -1; + } + th->ctid = (int *)(th->tib + 0x38); + + // We must use _mapstack() to allocate the stack because OpenBSD has + // very strict requirements for what's allowed to be used for stacks + if (!(th->stk = _mapstack())) { + free(th->tls); + return -1; + } + + if (clone(fun, th->stk, GetStackSize(), + CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | + CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | + CLONE_CHILD_CLEARTID, + arg, &th->ptid, th->tib, _TIBZ, th->ctid) == -1) { + _freestack(th->stk); + free(th->tls); + return -1; + } + + return 0; +} + +/** + * Waits for thread created by _spawn() to terminate. + * + * This will free your thread's stack and tls memory too. + */ +int _join(struct spawn *th) { + int rc; + if (th->ctid) { + // wait for ctid to become zero + _wait0(th->ctid); + // free thread memory + free(th->tls); + rc = munmap(th->stk, GetStackSize()); + } else { + rc = 0; + } + bzero(th, sizeof(*th)); + return rc; +} diff --git a/libc/thread/spawn.h b/libc/thread/spawn.h new file mode 100644 index 000000000..0c9bf7861 --- /dev/null +++ b/libc/thread/spawn.h @@ -0,0 +1,20 @@ +#ifndef COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ +#define COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +struct spawn { + int ptid; + int *ctid; + char *stk; + char *tls; + char *tib; +}; + +int _spawn(int (*)(void *, int), void *, struct spawn *) hidden; +int _join(struct spawn *) hidden; +char *_mktls(char **) hidden; + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ */ diff --git a/libc/thread/thread.h b/libc/thread/thread.h index 757204a57..d8938c972 100644 --- a/libc/thread/thread.h +++ b/libc/thread/thread.h @@ -15,7 +15,6 @@ enum cthread_state { cthread_joining = 1, cthread_finished = 2, cthread_detached = 4, - cthread_main = 127, }; struct cthread_descriptor_t { diff --git a/test/libc/calls/reservefd_test.c b/test/libc/calls/reservefd_test.c index ace2bb5be..b574a4942 100644 --- a/test/libc/calls/reservefd_test.c +++ b/test/libc/calls/reservefd_test.c @@ -40,6 +40,7 @@ #include "libc/sysv/consts/sig.h" #include "libc/testlib/hyperion.h" #include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" #include "libc/time/time.h" STATIC_YOINK("zip_uri_support"); @@ -51,9 +52,6 @@ void PullSomeZipFilesIntoLinkage(void) { gmtime(0); } -char *stack[THREADS]; -char tls[THREADS][64]; - TEST(reservefd, testGrowthOfFdsDataStructure) { int i, n; struct rlimit rlim; @@ -87,7 +85,7 @@ void OnSigAlrm(int sig, siginfo_t *si, ucontext_t *ctx) { close(fd); // can eintr which doesn't matter } -int Worker(void *p) { +int Worker(void *p, int tid) { char buf[64]; int i, rc, fd; for (i = 0; i < 64; ++i) { @@ -111,6 +109,7 @@ int Worker(void *p) { TEST(reservefd, tortureTest) { int i; + struct spawn th[THREADS]; struct sigaction oldsa; struct itimerval oldit; struct itimerval it = {{0, 10000}, {0, 100}}; @@ -119,17 +118,10 @@ TEST(reservefd, tortureTest) { // ASSERT_SYS(0, 0, sigaction(SIGALRM, &sa, &oldsa)); // ASSERT_SYS(0, 0, setitimer(ITIMER_REAL, &it, &oldit)); for (i = 0; i < THREADS; ++i) { - clone(Worker, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0)), - GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - 0, 0, __initialize_tls(tls[i]), sizeof(tls[i]), - (int *)(tls[i] + 0x38)); + _spawn(Worker, 0, th + i); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); + _join(th + i); } // EXPECT_SYS(0, 0, sigaction(SIGALRM, &oldsa, 0)); // EXPECT_SYS(0, 0, setitimer(ITIMER_REAL, &oldit, 0)); diff --git a/test/libc/calls/test.mk b/test/libc/calls/test.mk index d9292a68a..56e81e23c 100644 --- a/test/libc/calls/test.mk +++ b/test/libc/calls/test.mk @@ -42,6 +42,7 @@ TEST_LIBC_CALLS_DIRECTDEPS = \ LIBC_STR \ LIBC_STUBS \ LIBC_SYSV \ + LIBC_THREAD \ LIBC_TIME \ LIBC_TESTLIB \ LIBC_UNICODE \ diff --git a/test/libc/intrin/kprintf_test.c b/test/libc/intrin/kprintf_test.c index 6ef0286d8..2457f9602 100644 --- a/test/libc/intrin/kprintf_test.c +++ b/test/libc/intrin/kprintf_test.c @@ -43,7 +43,8 @@ */ static uint64_t Rando(void) { uint64_t x; - do x = lemur64(); + do + x = lemur64(); while (((x ^ READ64LE("!!!!!!!!")) - 0x0101010101010101) & ~(x ^ READ64LE("!!!!!!!!")) & 0x8080808080808080); return x; @@ -279,7 +280,7 @@ TEST(ksnprintf, testMisalignedPointer_wontFormat) { TEST(ksnprintf, testUnterminatedOverrun_truncatesAtPageBoundary) { char *m; char b[32]; - m = memset(mapanon(FRAMESIZE * 2), 1, FRAMESIZE); + m = memset(_mapanon(FRAMESIZE * 2), 1, FRAMESIZE); EXPECT_SYS(0, 0, munmap(m + FRAMESIZE, FRAMESIZE)); EXPECT_EQ(12, ksnprintf(b, 32, "%'s", m + FRAMESIZE - 3)); EXPECT_STREQ("\\001\\001\\001", b); diff --git a/test/libc/intrin/pthread_mutex_lock_test.c b/test/libc/intrin/pthread_mutex_lock_test.c index 7afd56f78..8582c7684 100644 --- a/test/libc/intrin/pthread_mutex_lock_test.c +++ b/test/libc/intrin/pthread_mutex_lock_test.c @@ -41,32 +41,18 @@ #include "libc/sysv/consts/rlimit.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" #include "libc/thread/thread.h" #define THREADS 8 #define ITERATIONS 512 -#define TLS_SIZE PAGESIZE - -char *tls[THREADS]; -char *stack[THREADS]; -_Alignas(PAGESIZE) char tlsdata[THREADS * 3][TLS_SIZE]; int count; _Atomic(int) started; _Atomic(int) finished; _Alignas(64) char slock; pthread_mutex_t mylock; - -__attribute__((__constructor__)) void init(void) { - int i; - __enable_tls(); - __enable_threads(); - for (i = 0; i < THREADS; ++i) { - CHECK_NE(-1, mprotect(tlsdata[i * 3 + 0], TLS_SIZE, PROT_NONE)); - tls[i] = tlsdata[i * 3 + 1]; - CHECK_NE(-1, mprotect(tlsdata[i * 3 + 2], TLS_SIZE, PROT_NONE)); - } -} +struct spawn th[THREADS]; TEST(pthread_mutex_lock, normal) { pthread_mutex_t lock; @@ -116,7 +102,7 @@ TEST(pthread_mutex_lock, errorcheck) { __assert_disable = false; } -int MutexWorker(void *p) { +int MutexWorker(void *p, int tid) { int i; ++started; for (i = 0; i < ITERATIONS; ++i) { @@ -124,7 +110,6 @@ int MutexWorker(void *p) { ++count; pthread_mutex_unlock(&mylock); } - ASSERT_NE(0, (int *)(tls[(intptr_t)p] + 0x38)); ++finished; return 0; } @@ -140,29 +125,14 @@ TEST(pthread_mutex_lock, contention) { started = 0; finished = 0; for (i = 0; i < THREADS; ++i) { - ASSERT_NE(MAP_FAILED, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); - ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID | CLONE_SETTLS, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), - TLS_SIZE, (int *)(tls[i] + 0x38))); + ASSERT_SYS(0, 0, _spawn(MutexWorker, (void *)(intptr_t)i, th + i)); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); - } - for (i = 0; i < THREADS; ++i) { - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); + ASSERT_SYS(0, 0, _join(th + i)); } EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS * ITERATIONS, count); - for (i = 0; i < THREADS; ++i) { - ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize())); - } EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); } @@ -177,29 +147,14 @@ TEST(pthread_mutex_lock, rcontention) { started = 0; finished = 0; for (i = 0; i < THREADS; ++i) { - ASSERT_NE(MAP_FAILED, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); - ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID | CLONE_SETTLS, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), - TLS_SIZE, (int *)(tls[i] + 0x38))); + ASSERT_NE(-1, _spawn(MutexWorker, (void *)(intptr_t)i, th + i)); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); - } - for (i = 0; i < THREADS; ++i) { - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); + _join(th + i); } EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS * ITERATIONS, count); - for (i = 0; i < THREADS; ++i) { - ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize())); - } EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); } @@ -214,33 +169,18 @@ TEST(pthread_mutex_lock, econtention) { started = 0; finished = 0; for (i = 0; i < THREADS; ++i) { - ASSERT_NE(MAP_FAILED, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); - ASSERT_NE(-1, clone(MutexWorker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID | CLONE_SETTLS, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), - TLS_SIZE, (int *)(tls[i] + 0x38))); + ASSERT_NE(-1, _spawn(MutexWorker, (void *)(intptr_t)i, th + i)); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); - } - for (i = 0; i < THREADS; ++i) { - ASSERT_EQ(0, *(int *)(tls[i] + 0x38)); + _join(th + i); } EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS * ITERATIONS, count); - for (i = 0; i < THREADS; ++i) { - ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize())); - } EXPECT_EQ(0, pthread_mutex_destroy(&mylock)); } -int SpinlockWorker(void *p) { +int SpinlockWorker(void *p, int tid) { int i; ++started; for (i = 0; i < ITERATIONS; ++i) { @@ -248,7 +188,6 @@ int SpinlockWorker(void *p) { ++count; _spunlock(&slock); } - ASSERT_NE(0, (int *)(tls[(intptr_t)p] + 0x38)); ++finished; return 0; } @@ -259,25 +198,14 @@ TEST(_spinlock, contention) { started = 0; finished = 0; for (i = 0; i < THREADS; ++i) { - ASSERT_NE(MAP_FAILED, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); - ASSERT_NE(-1, clone(SpinlockWorker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID | CLONE_SETTLS, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), - TLS_SIZE, (int *)(tls[i] + 0x38))); + ASSERT_NE(-1, _spawn(SpinlockWorker, (void *)(intptr_t)i, th + i)); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); + _join(th + i); } EXPECT_EQ(THREADS, started); EXPECT_EQ(THREADS, finished); EXPECT_EQ(THREADS * ITERATIONS, count); - for (i = 0; i < THREADS; ++i) { - ASSERT_SYS(0, 0, munmap(stack[i], GetStackSize())); - } } BENCH(pthread_mutex_lock, bench) { diff --git a/test/libc/intrin/test.mk b/test/libc/intrin/test.mk index 77e858ccd..d23ed237a 100644 --- a/test/libc/intrin/test.mk +++ b/test/libc/intrin/test.mk @@ -35,6 +35,7 @@ TEST_LIBC_INTRIN_DIRECTDEPS = \ LIBC_STR \ LIBC_STUBS \ LIBC_SYSV \ + LIBC_THREAD \ LIBC_TESTLIB \ LIBC_TINYMATH \ LIBC_UNICODE \ diff --git a/test/libc/nexgen32e/lz4decode_test.c b/test/libc/nexgen32e/lz4decode_test.c index 9d78ecadd..2b222c03b 100644 --- a/test/libc/nexgen32e/lz4decode_test.c +++ b/test/libc/nexgen32e/lz4decode_test.c @@ -78,7 +78,7 @@ TEST(lz4, zoneFileGmt) { size_t mapsize, gmtsize; char *mapping, *gmtdata; lz4decode((gmtdata = lz4decode( - (mapping = mapanon( + (mapping = _mapanon( (mapsize = roundup( LZ4_FRAME_BLOCKCONTENTSIZE(lz4check(dict.addr)) + (gmtsize = LZ4_FRAME_BLOCKCONTENTSIZE( diff --git a/test/libc/rand/rand64_test.c b/test/libc/rand/rand64_test.c index 48dbc5d31..f6b77a237 100644 --- a/test/libc/rand/rand64_test.c +++ b/test/libc/rand/rand64_test.c @@ -37,6 +37,7 @@ #include "libc/sysv/consts/sa.h" #include "libc/sysv/consts/sig.h" #include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" #include "libc/thread/thread.h" #include "libc/time/time.h" @@ -54,7 +55,7 @@ dontinline void Generate(int i) { A[i] = rand64(); } -int Thrasher(void *arg) { +int Thrasher(void *arg, int tid) { int i, id = (intptr_t)arg; while (!atomic_load(&ready)) { cthread_memory_wait32(&ready, 0, 0); @@ -83,9 +84,8 @@ TEST(rand64, testLcg_doesntProduceIdenticalValues) { TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { int i, j, rc, ws; sigset_t ss, oldss; - char *tls[THREADS]; - void *stacks[THREADS]; struct sigaction oldsa; + struct spawn th[THREADS]; struct sigaction sa = {.sa_handler = OnChld, .sa_flags = SA_RESTART}; EXPECT_NE(-1, sigaction(SIGCHLD, &sa, &oldsa)); bzero(A, sizeof(A)); @@ -94,25 +94,12 @@ TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { EXPECT_EQ(0, sigprocmask(SIG_BLOCK, &ss, &oldss)); ready = false; for (i = 0; i < THREADS; ++i) { - tls[i] = __initialize_tls(calloc(1, 64)); - stacks[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0); - ASSERT_NE( - -1, - clone(Thrasher, stacks[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, - (void *)(intptr_t)i, 0, tls[i], 64, (int *)(tls[i] + 0x38))); + ASSERT_SYS(0, 0, _spawn(Thrasher, (void *)(intptr_t)i, th + i)); } atomic_store(&ready, 1); cthread_memory_wake32(&ready, INT_MAX); for (i = 0; i < THREADS; ++i) { - while ((j = atomic_load((uint32_t *)(tls[i] + 0x38)))) { - // FUTEX_WAIT_PRIVATE makes it hang - cthread_memory_wait32((int *)(tls[i] + 0x38), j, 0); - } - EXPECT_SYS(0, 0, munmap(stacks[i], GetStackSize())); - free(tls[i]); + ASSERT_SYS(0, 0, _join(th + i)); } sigaction(SIGCHLD, &oldsa, 0); sigprocmask(SIG_BLOCK, &oldss, 0); diff --git a/test/libc/runtime/clone_test.c b/test/libc/runtime/clone_test.c index cd9d2c7b6..3b6b8b7d2 100644 --- a/test/libc/runtime/clone_test.c +++ b/test/libc/runtime/clone_test.c @@ -23,9 +23,11 @@ #include "libc/intrin/spinlock.h" #include "libc/intrin/wait0.internal.h" #include "libc/log/backtrace.internal.h" +#include "libc/macros.internal.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/gettls.h" #include "libc/nexgen32e/nexgen32e.h" +#include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/stack.h" #include "libc/runtime/symbols.internal.h" @@ -36,10 +38,10 @@ #include "libc/sysv/consts/sig.h" #include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" #include "libc/time/time.h" -char *stack, *tls; -int x, me, tid, *childetid; +int x, me, tid; _Atomic(int) thechilde; __attribute__((__constructor__)) static void init(void) { @@ -47,47 +49,38 @@ __attribute__((__constructor__)) static void init(void) { errno = 0; } +void *__initialize_tls(char tib[64]) { + if (tib) { + *(intptr_t *)(tib + 0x00) = (intptr_t)tib; + *(intptr_t *)(tib + 0x30) = (intptr_t)tib; + *(int *)(tib + 0x38) = -1; // tid + *(int *)(tib + 0x3c) = 0; + } + return tib; +} + void SetUp(void) { x = 0; me = gettid(); - tls = calloc(1, 64); - __initialize_tls(tls); - *(int *)(tls + 0x3c) = 31337; - childetid = (int *)(tls + 0x38); - ASSERT_NE(MAP_FAILED, (stack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); } void TearDown(void) { - EXPECT_SYS(0, 0, munmap(stack, GetStackSize())); - free(tls); } int DoNothing(void *arg) { return 0; } -//////////////////////////////////////////////////////////////////////////////// -// TEST ERROR NUMBERS - -TEST(clone, testNullFunc_raisesEinval) { - EXPECT_SYS(EINVAL, -1, - clone(0, stack, GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_SETTLS, - 0, 0, tls, 64, 0)); -} - //////////////////////////////////////////////////////////////////////////////// // TEST THREADS WORK -int CloneTest1(void *arg) { +int CloneTest1(void *arg, int tid) { intptr_t rsp, top, bot; CheckStackIsAligned(); // PrintBacktraceUsingSymbols(2, __builtin_frame_address(0), // GetSymbolTable()); rsp = (intptr_t)__builtin_frame_address(0); - bot = (intptr_t)stack; + bot = ROUNDDOWN((intptr_t)rsp, GetStackSize()); top = bot + GetStackSize(); ASSERT_GT(rsp, bot); // check we're on stack ASSERT_LT(rsp, top); // check we're on stack @@ -95,28 +88,16 @@ int CloneTest1(void *arg) { ASSERT_TRUE(IS2POW(GetStackSize())); ASSERT_EQ(0, bot & (GetStackSize() - 1)); x = 42; - if (!IsWindows()) { - ASSERT_EQ(31337, errno); - } else { - errno = 31337; - ASSERT_EQ(31337, errno); - } ASSERT_EQ(23, (intptr_t)arg); ASSERT_NE(gettid(), getpid()); - ASSERT_EQ(gettid(), *childetid); // CLONE_CHILD_SETTID return 0; } TEST(clone, test1) { int ptid = 0; - *childetid = -1; - ASSERT_NE(-1, (tid = clone(CloneTest1, stack, GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_PARENT_SETTID | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | - CLONE_SETTLS, - (void *)23, &ptid, tls, 64, childetid))); - _wait0(childetid); // CLONE_CHILD_CLEARTID + struct spawn th; + ASSERT_SYS(0, 0, _spawn(CloneTest1, (void *)23, &th)); + ASSERT_SYS(0, 0, _join(&th)); ASSERT_NE(gettid(), tid); ASSERT_EQ(tid, ptid); ASSERT_EQ(42, x); @@ -132,7 +113,7 @@ TEST(clone, test1) { _Atomic(int) sysbarrier; -int CloneTestSys(void *arg) { +int CloneTestSys(void *arg, int tid) { int i, id = (intptr_t)arg; CheckStackIsAligned(); while (!sysbarrier) asm("pause"); @@ -165,25 +146,14 @@ int CloneTestSys(void *arg) { TEST(clone, tlsSystemCallsErrno_wontClobberMainThreadBecauseTls) { int i; - char *tls[8], *stack[8]; + struct spawn th[8]; ASSERT_EQ(0, errno); for (i = 0; i < 8; ++i) { - tls[i] = __initialize_tls(malloc(64)); - stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0); - ASSERT_NE( - -1, - (tid = clone( - CloneTestSys, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - (void *)(intptr_t)i, 0, tls[i], 64, (int *)(tls[i] + 0x38)))); + ASSERT_SYS(0, 0, _spawn(CloneTestSys, (void *)(intptr_t)i, th + i)); } sysbarrier = 1; for (i = 0; i < 8; ++i) { - _wait0((int *)(tls[i] + 0x38)); - free(tls[i]); - munmap(stack[i], GetStackSize()); + ASSERT_SYS(0, 0, _join(th + i)); } ASSERT_EQ(0, errno); } diff --git a/test/libc/runtime/mprotect_test.c b/test/libc/runtime/mprotect_test.c index 8064ac9a9..a18afae13 100644 --- a/test/libc/runtime/mprotect_test.c +++ b/test/libc/runtime/mprotect_test.c @@ -119,7 +119,7 @@ TEST(mprotect, testSegfault_writeToReadOnlyAnonymous) { } TEST(mprotect, testExecOnly_canExecute) { - char *p = mapanon(FRAMESIZE); + char *p = _mapanon(FRAMESIZE); void (*f)(void) = (void *)p; p[0] = 0xC3; // RET ASSERT_SYS(0, 0, mprotect(p, FRAMESIZE, PROT_EXEC | PROT_READ)); diff --git a/test/libc/runtime/test.mk b/test/libc/runtime/test.mk index 05c56c00f..38f4b581f 100644 --- a/test/libc/runtime/test.mk +++ b/test/libc/runtime/test.mk @@ -37,6 +37,7 @@ TEST_LIBC_RUNTIME_DIRECTDEPS = \ LIBC_STR \ LIBC_STUBS \ LIBC_SYSV \ + LIBC_THREAD \ LIBC_TESTLIB \ LIBC_TINYMATH \ LIBC_UNICODE \ diff --git a/test/libc/stdio/dtoa_test.c b/test/libc/stdio/dtoa_test.c index f1e8cbfd1..fb0b1953d 100644 --- a/test/libc/stdio/dtoa_test.c +++ b/test/libc/stdio/dtoa_test.c @@ -28,6 +28,7 @@ #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" #include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" #include "libc/x/x.h" #define THREADS 32 @@ -46,10 +47,9 @@ union Dub { double x; }; -char *stack[THREADS]; -char tls[THREADS][64]; +struct spawn th[THREADS]; -int Worker(void *p) { +int Worker(void *p, int tid) { int i; char str[64]; for (i = 0; i < 256; ++i) { @@ -63,17 +63,10 @@ int Worker(void *p) { TEST(dtoa, test) { int i; for (i = 0; i < THREADS; ++i) { - clone(Worker, - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0)), - GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - 0, 0, __initialize_tls(tls[i]), sizeof(tls[i]), - (int *)(tls[i] + 0x38)); + _spawn(Worker, 0, th + i); } for (i = 0; i < THREADS; ++i) { - _wait0((int *)(tls[i] + 0x38)); + _join(th + i); } } diff --git a/test/libc/stdio/test.mk b/test/libc/stdio/test.mk index a171f7c91..04c1627ef 100644 --- a/test/libc/stdio/test.mk +++ b/test/libc/stdio/test.mk @@ -37,6 +37,7 @@ TEST_LIBC_STDIO_DIRECTDEPS = \ LIBC_SYSV \ LIBC_TINYMATH \ LIBC_TESTLIB \ + LIBC_THREAD \ LIBC_TIME \ LIBC_LOG \ LIBC_UNICODE \ diff --git a/test/libc/thread/create_test.c b/test/libc/thread/create_test.c deleted file mode 100644 index 6cdab1244..000000000 --- a/test/libc/thread/create_test.c +++ /dev/null @@ -1,100 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" -#include "libc/dce.h" -#include "libc/errno.h" -#include "libc/runtime/internal.h" -#include "libc/runtime/runtime.h" -#include "libc/testlib/testlib.h" -#include "libc/thread/thread.h" -#include "libc/time/time.h" - -static _Thread_local int tdata = 31337; -static _Thread_local int tbss; - -static void *ReturnArg(void *arg) { - return arg; -} - -TEST(cthread_create, testJoinDeadlock) { - ASSERT_SYS(0, EDEADLK, cthread_join(cthread_self(), 0)); -} - -TEST(cthread_create, testCreateReturnJoin) { - if (IsOpenbsd()) return; // TODO(jart): flakes - void *exitcode; - cthread_t thread; - ASSERT_EQ(0, cthread_create(&thread, 0, ReturnArg, ReturnArg)); - ASSERT_EQ(0, cthread_join(thread, &exitcode)); - ASSERT_EQ(ReturnArg, exitcode); -} - -static void *ExitArg(void *arg) { - cthread_exit(arg); -} - -TEST(cthread_create, testCreateExitJoin) { - if (IsOpenbsd()) return; // TODO(jart): flakes - void *exitcode; - cthread_t thread; - ASSERT_EQ(0, cthread_create(&thread, 0, ExitArg, (void *)-31337)); - ASSERT_EQ(0, cthread_join(thread, &exitcode)); - ASSERT_EQ((void *)-31337, exitcode); -} - -TEST(gcctls, size) { - if (IsXnu()) return; // TODO(jart): codemorph - if (IsWindows()) return; // TODO(jart): codemorph - if (IsOpenbsd()) return; // TODO(jart): flakes - // schlep in .zip section too - // make sure executable isn't too huge - size_t size; - int64_t x = 0; - gmtime(&x); - ASSERT_LT((uintptr_t)_tls_size, 8192); - size = GetFileSize(GetProgramExecutableName()); - if (IsTiny()) { - ASSERT_LT(size, 200 * 1024); - } else if (IsModeDbg() || IsAsan()) { - ASSERT_LT(size, 4 * 1024 * 1024); - } else { - ASSERT_LT(size, 500 * 1024); - } -} - -static void *TlsWorker(void *arg) { - ASSERT_EQ(31337, tdata); - ASSERT_EQ(0, tbss); - return 0; -} - -TEST(gcctls, worksAndIsNonInheritable) { - if (IsXnu()) return; // TODO(jart): codemorph - if (IsWindows()) return; // TODO(jart): codemorph - if (IsOpenbsd()) return; // TODO(jart): flakes - void *exitcode; - cthread_t thread; - ASSERT_EQ(tdata, 31337); - ASSERT_EQ(tbss, 0); - tdata = 1337; - tbss = 1337; - ASSERT_EQ(0, cthread_create(&thread, 0, TlsWorker, (void *)-31337)); - ASSERT_EQ(0, cthread_join(thread, &exitcode)); - ASSERT_EQ(NULL, exitcode); -} diff --git a/test/libc/intrin/gettid_test.c b/test/libc/thread/dog.c similarity index 78% rename from test/libc/intrin/gettid_test.c rename to test/libc/thread/dog.c index 5f7bde5b8..184fb3c9b 100644 --- a/test/libc/intrin/gettid_test.c +++ b/test/libc/thread/dog.c @@ -16,22 +16,15 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" -#include "libc/dce.h" -#include "libc/nexgen32e/threaded.h" -#include "libc/testlib/ezbench.h" -#include "libc/testlib/testlib.h" -char tib[64]; +_Thread_local char x; +static _Thread_local char y; -TEST(gettid, test) { - if (IsLinux()) EXPECT_EQ(getpid(), gettid()); - if (IsNetbsd()) EXPECT_EQ(1, gettid()); +char ha(void) { + ++y; + return x; } -BENCH(gettid, bench) { - int gettid_(void) asm("gettid"); - EZBENCH2("gettid (single threaded)", donothing, gettid()); - __install_tls(__initialize_tls(tib)); - EZBENCH2("gettid (tls enabled)", donothing, gettid()); +char ya(void) { + return y; } diff --git a/tool/build/wastecpu.c b/test/libc/thread/spawn_test.c similarity index 53% rename from tool/build/wastecpu.c rename to test/libc/thread/spawn_test.c index fbb1d86e1..0a5d7b66e 100644 --- a/tool/build/wastecpu.c +++ b/test/libc/thread/spawn_test.c @@ -16,63 +16,42 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/struct/sigaction.h" -#include "libc/intrin/spinlock.h" -#include "libc/intrin/wait0.internal.h" -#include "libc/log/log.h" -#include "libc/mem/mem.h" -#include "libc/nexgen32e/threaded.h" -#include "libc/runtime/gc.internal.h" -#include "libc/runtime/runtime.h" -#include "libc/runtime/stack.h" -#include "libc/runtime/sysconf.h" -#include "libc/sysv/consts/clone.h" -#include "libc/sysv/consts/map.h" -#include "libc/sysv/consts/prot.h" -#include "libc/sysv/consts/sig.h" -#include "libc/time/time.h" +#include "libc/assert.h" +#include "libc/bits/atomic.h" +#include "libc/calls/calls.h" +#include "libc/calls/syscall-sysv.internal.h" +#include "libc/intrin/kprintf.h" +#include "libc/macros.internal.h" +#include "libc/testlib/testlib.h" +#include "libc/thread/spawn.h" +#include "libc/thread/thread.h" -volatile bool gotctrlc; +#define N 128 -void GotCtrlC(int sig) { - gotctrlc = true; +struct spawn t[N]; +_Atomic(int) itworked; +_Thread_local int var; + +int Worker(void *arg, int tid) { + int i = (long)arg; + ASSERT_EQ(0, var++); + ASSERT_EQ(gettid(), tid); + ASSERT_EQ(1, var++); + ASSERT_EQ(sys_gettid(), tid); + ASSERT_EQ(2, var++); + itworked++; + return 0; } -int Worker(void *arg) { - uint8_t *p; - unsigned x = 0; - struct sigaction sa = {.sa_handler = GotCtrlC}; - sigaction(SIGINT, &sa, 0); - for (;;) { - for (p = _base; p < _end; ++p) { - x += *p; - if (gotctrlc) { - return x | x >> 8 | x >> 16 | x >> 24; - } - } - } +TEST(_spawn, test) { + long i; + for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _spawn(Worker, (void *)i, t + i)); + for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i)); + for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i)); + EXPECT_EQ(N, itworked); } -int main(int argc, char *argv[]) { - char **tls; - int i, n, prot, flags; - ShowCrashReports(); - n = GetCpuCount(); - tls = gc(malloc(n * sizeof(*tls))); - for (i = 0; i < n; ++i) { - prot = PROT_READ | PROT_WRITE; - flags = MAP_STACK | MAP_ANONYMOUS; - tls[i] = __initialize_tls(malloc(64)); - clone(Worker, mmap(0, GetStackSize(), prot, flags, -1, 0), GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - 0, 0, tls[i], 64, (int *)(tls[i] + 0x38)); - } - while (!gotctrlc) { - usleep(1000); - } - for (i = 0; i < n; ++i) { - _wait0((int *)(tls[i] + 0x38)); - free(tls[i]); - } +__attribute__((__constructor__)) static void init(void) { + pledge("stdio rpath thread", 0); + errno = 0; } diff --git a/third_party/python/Objects/obmalloc.c b/third_party/python/Objects/obmalloc.c index 93ec3d515..7f8942355 100644 --- a/third_party/python/Objects/obmalloc.c +++ b/third_party/python/Objects/obmalloc.c @@ -184,7 +184,7 @@ static void * _PyObject_ArenaMmap(void *ctx, size_t size) { #ifdef __COSMOPOLITAN__ - return mapanon(size); + return _mapanon(size); #else void *ptr; ptr = mmap(NULL, size, PROT_READ|PROT_WRITE, diff --git a/tool/build/build.mk b/tool/build/build.mk index c1db55562..6cc7a0069 100644 --- a/tool/build/build.mk +++ b/tool/build/build.mk @@ -47,6 +47,7 @@ TOOL_BUILD_DIRECTDEPS = \ LIBC_SYSV \ LIBC_SYSV_CALLS \ LIBC_TIME \ + LIBC_THREAD \ LIBC_TINYMATH \ LIBC_UNICODE \ LIBC_X \ diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index eefb4dfa3..9bffb625d 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -52,6 +52,7 @@ #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/prot.h" +#include "libc/thread/spawn.h" #include "libc/time/time.h" #include "libc/x/x.h" #include "third_party/getopt/getopt.h" @@ -122,11 +123,10 @@ struct Edges { }; char *out; -char **tls; int threads; char **bouts; -char **stack; unsigned counter; +struct spawn *th; struct GetArgs ga; struct Edges edges; struct Sauce *sauces; @@ -248,7 +248,7 @@ wontreturn void OnMissingFile(const char *list, const char *src) { exit(1); } -int LoadRelationshipsWorker(void *arg) { +int LoadRelationshipsWorker(void *arg, int tid) { int fd; ssize_t rc; bool skipme; @@ -307,18 +307,14 @@ void LoadRelationships(int argc, char *argv[]) { int i; getargs_init(&ga, argv + optind); for (i = 0; i < threads; ++i) { - if (clone(LoadRelationshipsWorker, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64, - (int *)(tls[i] + 0x38)) == -1) { + if (_spawn(LoadRelationshipsWorker, (void *)(intptr_t)i, th + i) == -1) { pthread_mutex_lock(&reportlock); kprintf("error: clone(%d) failed %m\n", i); exit(1); } } for (i = 0; i < threads; ++i) { - _wait0((int *)(tls[i] + 0x38)); + _join(th + i); } getargs_destroy(&ga); } @@ -388,17 +384,17 @@ void Dive(char **bout, uint32_t *visited, unsigned id) { } } -int Diver(void *arg) { +int Diver(void *arg, int tid) { char *bout = 0; const char *path; uint32_t *visited; size_t i, visilen; char pathbuf[PATH_MAX]; - int tid = (intptr_t)arg; + int x = (intptr_t)arg; visilen = (sources.i + sizeof(*visited) * CHAR_BIT - 1) / (sizeof(*visited) * CHAR_BIT); visited = malloc(visilen * sizeof(*visited)); - for (i = tid; i < sources.i; i += threads) { + for (i = x; i < sources.i; i += threads) { path = strings.p + sauces[i].name; if (!IsObjectSource(path)) continue; appendw(&bout, '\n'); @@ -415,25 +411,21 @@ int Diver(void *arg) { } free(visited); appendw(&bout, '\n'); - bouts[tid] = bout; + bouts[x] = bout; return 0; } void Explore(void) { int i; for (i = 0; i < threads; ++i) { - if (clone(Diver, stack[i], GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, - (void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64, - (int *)(tls[i] + 0x38)) == -1) { + if (_spawn(Diver, (void *)(intptr_t)i, th + i) == -1) { pthread_mutex_lock(&reportlock); kprintf("error: clone(%d) failed %m\n", i); exit(1); } } for (i = 0; i < threads; ++i) { - _wait0((int *)(tls[i] + 0x38)); + _join(th + i); } } @@ -443,17 +435,8 @@ int main(int argc, char *argv[]) { if (argc == 2 && !strcmp(argv[1], "-n")) exit(0); GetOpts(argc, argv); threads = GetCpuCount(); - tls = calloc(threads, sizeof(*tls)); - stack = calloc(threads, sizeof(*stack)); + th = calloc(threads, sizeof(*th)); bouts = calloc(threads, sizeof(*bouts)); - for (i = 0; i < threads; ++i) { - if (!(tls[i] = malloc(64)) || - (stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED) { - kprintf("error: mmap(%d) failed %m\n", i); - exit(1); - } - } LoadRelationships(argc, argv); Crunch(); Explore(); @@ -466,15 +449,12 @@ int main(int argc, char *argv[]) { CHECK_NE(-1, close(fd)); CHECK_NE(-1, rename(path, out)); for (i = 0; i < threads; ++i) { - munmap(stack[i], GetStackSize()); free(bouts[i]); - free(tls[i]); } free(strings.p); free(edges.p); free(sauces); - free(stack); free(bouts); - free(tls); + free(th); return 0; } diff --git a/tool/net/net.mk b/tool/net/net.mk index f6a450d36..c6288fcc8 100644 --- a/tool/net/net.mk +++ b/tool/net/net.mk @@ -49,6 +49,7 @@ TOOL_NET_DIRECTDEPS = \ LIBC_SYSV \ LIBC_SYSV_CALLS \ LIBC_TIME \ + LIBC_THREAD \ LIBC_TINYMATH \ LIBC_UNICODE \ LIBC_X \ diff --git a/tool/net/redbean.c b/tool/net/redbean.c index f81db3d4f..79cf5c4f4 100644 --- a/tool/net/redbean.c +++ b/tool/net/redbean.c @@ -36,7 +36,6 @@ #include "libc/intrin/kprintf.h" #include "libc/intrin/nomultics.internal.h" #include "libc/intrin/spinlock.h" -#include "libc/intrin/wait0.internal.h" #include "libc/log/check.h" #include "libc/log/log.h" #include "libc/macros.internal.h" @@ -86,6 +85,7 @@ #include "libc/sysv/consts/termios.h" #include "libc/sysv/consts/w.h" #include "libc/sysv/errfuns.h" +#include "libc/thread/spawn.h" #include "libc/x/x.h" #include "libc/zip.h" #include "net/http/escape.h" @@ -421,7 +421,6 @@ static lua_State *GL; static lua_State *YL; static char *content; static uint8_t *zmap; -static char *repltls; static uint8_t *zbase; static uint8_t *zcdir; static size_t hdrsize; @@ -431,7 +430,6 @@ static char *replstack; static reader_f reader; static writer_f writer; static char *extrahdrs; -static char *monitortls; static char *luaheaderp; static const char *zpath; static const char *brand; @@ -454,6 +452,8 @@ static const char *launchbrowser; static const char *referrerpolicy; static ssize_t (*generator)(struct iovec[3]); +static struct spawn replth; +static struct spawn monitorth; static struct Buffer inbuf_actual; static struct Buffer inbuf; static struct Buffer oldin; @@ -6461,7 +6461,7 @@ static int ExitWorker(void) { } if (monitortty) { terminatemonitor = true; - _wait0((int *)(monitortls + 0x38)); + _join(&monitorth); } _Exit(0); } @@ -6482,7 +6482,7 @@ static int EnableSandbox(void) { } } -static int MemoryMonitor(void *arg) { +static int MemoryMonitor(void *arg, int tid) { static struct termios oldterm; static int tty; sigset_t ss; @@ -6637,23 +6637,9 @@ static int MemoryMonitor(void *arg) { } static void MonitorMemory(void) { - if ((monitortls = malloc(64))) { - if ((monitorstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) { - if (clone(MemoryMonitor, monitorstack, GetStackSize(), - CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_SETTLS | CLONE_CHILD_SETTID | - CLONE_CHILD_CLEARTID, - 0, 0, __initialize_tls(monitortls), 64, - (int *)(monitortls + 0x38)) != -1) { - return; - } - munmap(monitorstack, GetStackSize()); - } - free(monitortls); + if (_spawn(MemoryMonitor, 0, &monitorth) == -1) { + WARNF("(memv) failed to start memory monitor %m"); } - WARNF("(memv) failed to start memory monitor %m"); - monitortty = 0; } static int HandleConnection(size_t i) { @@ -7029,7 +7015,7 @@ static void ReplEventLoop(void) { polls[0].fd = -1; } -static int WindowsReplThread(void *arg) { +static int WindowsReplThread(void *arg, int tid) { int sig; lua_State *L = GL; DEBUGF("(repl) started windows thread"); @@ -7289,16 +7275,7 @@ void RedBean(int argc, char *argv[]) { if (daemonize || uniprocess || !linenoiseIsTerminal()) { EventLoop(HEARTBEAT); } else if (IsWindows()) { - CHECK_NE(MAP_FAILED, (repltls = malloc(64))); - CHECK_NE(MAP_FAILED, - (replstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0))); - CHECK_NE( - -1, - clone(WindowsReplThread, replstack, GetStackSize(), - CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, - 0, 0, __initialize_tls(repltls), 64, (int *)(repltls + 0x38))); + CHECK_NE(-1, _spawn(WindowsReplThread, 0, &replth)); EventLoop(100); } else { ReplEventLoop(); @@ -7315,19 +7292,11 @@ void RedBean(int argc, char *argv[]) { } if (!isexitingworker) { if (!IsTiny()) { - if (monitortty) { - terminatemonitor = true; - _wait0((int *)(monitortls + 0x38)); - munmap(monitorstack, GetStackSize()); - free(monitortls); - } + terminatemonitor = true; + _join(&monitorth); } #ifndef STATIC - if (repltls) { - _wait0((int *)(repltls + 0x38)); - munmap(replstack, GetStackSize()); - free(repltls); - } + _join(&replth); #endif } if (!isexitingworker) { @@ -7349,11 +7318,9 @@ int main(int argc, char *argv[]) { return 0; CloseServerFds(); } - if (repltls) { - free(repltls); - linenoiseDisableRawMode(); - linenoiseHistoryFree(); - } + _join(&replth); + linenoiseDisableRawMode(); + linenoiseHistoryFree(); } CheckForMemoryLeaks(); } diff --git a/tool/viz/derasterize.c b/tool/viz/derasterize.c index 0b35ca2d4..3df24fb78 100644 --- a/tool/viz/derasterize.c +++ b/tool/viz/derasterize.c @@ -104,7 +104,7 @@ int y_; /* -y HEIGHT [in flexidecimal] */ #define Mode BEST #if Mode == BEST -#define MC 9u /* log2(#) of color combos to consider */ +#define MC 9u /* log2(#) of color combos to consider */ #define GN 35u /* # of glyphs to consider */ #elif Mode == FAST #define MC 6u @@ -114,10 +114,10 @@ int y_; /* -y HEIGHT [in flexidecimal] */ #define GN 25u #endif -#define CN 3u /* # channels (rgb) */ -#define YS 8u /* row stride -or- block height */ -#define XS 4u /* column stride -or- block width */ -#define GT 44u /* total glyphs */ +#define CN 3u /* # channels (rgb) */ +#define YS 8u /* row stride -or- block height */ +#define XS 4u /* column stride -or- block width */ +#define GT 44u /* total glyphs */ #define BN (YS * XS) /* # scalars in block/glyph plane */ #define PHIPRIME 0x9E3779B1u @@ -434,7 +434,7 @@ static void PrintImage(unsigned yn, unsigned xn, char *v, *vt; size = yn * (xn * (32 + (2 + (1 + 3) * 3) * 2 + 1 + 3)) * 1 + 5 + 1; size = ROUNDUP(size, FRAMESIZE); - CHECK_NE(MAP_FAILED, (vt = mapanon(size))); + CHECK_NOTNULL((vt = _mapanon(size))); v = RenderImage(vt, yn, xn, rgb); *v++ = '\r'; *v++ = 033; @@ -532,8 +532,8 @@ static void LoadFile(const char *path, size_t yn, size_t xn, void *rgb) { CHECK_EQ(CN, 3); data2size = ROUNDUP(sizeof(float) * goty * gotx * CN, FRAMESIZE); data3size = ROUNDUP(sizeof(float) * yn * YS * xn * XS * CN, FRAMESIZE); - CHECK_NE(MAP_FAILED, (data2 = mapanon(data2size))); - CHECK_NE(MAP_FAILED, (data3 = mapanon(data3size))); + CHECK_NOTNULL((data2 = _mapanon(data2size))); + CHECK_NOTNULL((data3 = _mapanon(data3size))); rgb2lin(goty * gotx * CN, data2, data); lanczos3(yn * YS, xn * XS, data3, goty, gotx, data2, gotx * 3); rgb2std(yn * YS * xn * XS * CN, rgb, data3); @@ -603,7 +603,7 @@ int main(int argc, char *argv[]) { // FIXME: on the conversion stage should do 2Y because of halfblocks // printf( "filename >%s<\tx >%d<\ty >%d<\n\n", filename, x_, y_); size = y_ * YS * x_ * XS * CN; - CHECK_NE(MAP_FAILED, (rgb = mapanon(ROUNDUP(size, FRAMESIZE)))); + CHECK_NOTNULL((rgb = _mapanon(ROUNDUP(size, FRAMESIZE)))); for (i = optind; i < argc; ++i) { if (!argv[i]) continue; if (m_) { diff --git a/tool/viz/lib/sobel.c b/tool/viz/lib/sobel.c index a7cdaeb5c..4a5c15451 100644 --- a/tool/viz/lib/sobel.c +++ b/tool/viz/lib/sobel.c @@ -37,7 +37,7 @@ forceinline void ConvolveGradient(unsigned yn, unsigned xn, size_t size; unsigned y, x, i, j, k; float py[4], px[4], (*tmp)[yn][xn][4]; - tmp = mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE))); + tmp = _mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE))); for (y = 0; y < yn - KW + 1; ++y) { for (x = 0; x < xn - KW + 1; ++x) { for (k = 0; k < 4; ++k) py[k] = 0; diff --git a/tool/viz/printvideo.c b/tool/viz/printvideo.c index 7c495cc67..f812dd03e 100644 --- a/tool/viz/printvideo.c +++ b/tool/viz/printvideo.c @@ -751,7 +751,7 @@ static void RasterIt(void) { static bool once; static void *buf; if (!once) { - buf = mapanon(ROUNDUP(fb0_.size, FRAMESIZE)); + buf = _mapanon(ROUNDUP(fb0_.size, FRAMESIZE)); once = true; } WriteToFrameBuffer(fb0_.vscreen.yres_virtual, fb0_.vscreen.xres_virtual, buf,