diff --git a/ape/ape.lds b/ape/ape.lds index ca1b0956e..1db67d0e9 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -484,6 +484,7 @@ PFSTUB4(ape_elf_shstrndx, 0); HIDDEN(_tdata_size = _tdata_end - _tdata_start); HIDDEN(_tls_size = _tbss_end - _tdata_start); +HIDDEN(_tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start)); HIDDEN(__privileged_addr = ROUNDDOWN(__privileged_start, PAGESIZE)); HIDDEN(__privileged_size = (ROUNDUP(__privileged_end, PAGESIZE) - diff --git a/examples/greenbean.c b/examples/greenbean.c index 23853e2ca..b6c1bb0e6 100644 --- a/examples/greenbean.c +++ b/examples/greenbean.c @@ -115,7 +115,7 @@ int Worker(void *id, int tid) { struct timeval timeo = {KEEPALIVE / 1000, KEEPALIVE % 1000}; struct sockaddr_in addr = {.sin_family = AF_INET, .sin_port = htons(PORT)}; - server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + server = socket(AF_INET, SOCK_STREAM, 0); if (server == -1) { kprintf("socket() failed %m\n" " try running: sudo prlimit --pid=$$ --nofile=%d\n", @@ -294,7 +294,7 @@ int main(int argc, char *argv[]) { } threads = argc > 1 ? atoi(argv[1]) : GetCpuCount(); - if ((1 <= threads && threads <= 100000)) { + if (!(1 <= threads && threads <= 100000)) { kprintf("error: invalid number of threads\n"); exit(1); } diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 4be5df7d0..c76844754 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -64,10 +64,7 @@ struct CloneArgs { uint32_t utid; int64_t tid64; }; - union { - char lock; - void *oldrsp; - }; + char lock; int *ptid; int *ctid; int *ztid; @@ -287,6 +284,15 @@ static int CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, //////////////////////////////////////////////////////////////////////////////// // OPEN BESIYATA DISHMAYA +static void *oldrsp; + +__attribute__((__constructor__)) static void OpenbsdGetSafeRsp(void) { + // main thread stack should never be freed during process lifetime. we + // won't actually change this stack below. we just need need a place + // where threads can park RSP for a few instructions while dying. + oldrsp = __builtin_frame_address(0); +} + static wontreturn void OpenbsdThreadMain(void *p) { struct CloneArgs *wt = p; *wt->ptid = wt->tid; @@ -303,7 +309,7 @@ static wontreturn void OpenbsdThreadMain(void *p) { "movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0 "syscall" // __threxit() : "=m"(*wt->ztid) - : "a"(302), "m"(wt->oldrsp), "D"(wt->ztid) + : "a"(302), "m"(oldrsp), "D"(wt->ztid) : "rcx", "r11", "memory"); unreachable; } @@ -325,7 +331,6 @@ static int CloneOpenbsd(int (*func)(void *, int), char *stk, size_t stksz, wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; - wt->oldrsp = __builtin_frame_address(0); wt->arg = arg; wt->func = func; tf->tf_stack = (char *)wt - 8; @@ -591,13 +596,8 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid, int rc; struct CloneArgs *wt; - if ((flags & CLONE_SETTLS) && !__tls_enabled) { - __enable_tls(); - } - - if ((flags & CLONE_THREAD) && !__threaded) { - __enable_threads(); - } + if (flags & CLONE_SETTLS) __enable_tls(); + if (flags & CLONE_THREAD) __enable_threads(); if (!func) { rc = einval(); diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index adc19feaa..eba8547f3 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -76,6 +76,23 @@ cosmo: push %rbp ret .endfn cosmo,weak +#if !IsTiny() +// Enable TLS early if _Thread_local is used +// In MODE=tiny you may need to explicitly call __enable_tls() +// Otherwise this would bloat life.com from 16kb → 32kb D: + .init.start 304,_init_tls + mov $_tls_content,%eax + test %eax,%eax + jz 1f + push %rdi + push %rsi + call __enable_tls + pop %rsi + pop %rdi + jz 1f +1: .init.end 304,_init_tls +#endif + #if !IsTiny() // Creates deterministically addressed stack we can use // diff --git a/libc/thread/zombie.c b/libc/runtime/enable_threads.c similarity index 63% rename from libc/thread/zombie.c rename to libc/runtime/enable_threads.c index 8df6491fd..06c3310e3 100644 --- a/libc/thread/zombie.c +++ b/libc/runtime/enable_threads.c @@ -16,36 +16,42 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/atomic.h" -#include "libc/mem/mem.h" +#include "libc/assert.h" +#include "libc/calls/calls.h" +#include "libc/calls/strace.internal.h" +#include "libc/nexgen32e/threaded.h" #include "libc/runtime/runtime.h" -#include "libc/thread/thread.h" -static struct Zombie { - struct Zombie *next; - cthread_t td; -} * cthread_zombies; +extern int __threadcalls_end[]; +extern int __threadcalls_start[]; -void cthread_zombies_add(cthread_t td) { - struct Zombie *z; - if ((z = malloc(sizeof(struct Zombie)))) { - z->td = td; - z->next = atomic_load(&cthread_zombies); - for (;;) { - if (atomic_compare_exchange_weak(&cthread_zombies, &z->next, z)) { - break; - } - } - } -} - -void cthread_zombies_reap(void) { - struct Zombie *z; - // TODO(jart): Is this right? Update to not use malloc/free? - while ((z = atomic_load(&cthread_zombies)) && !atomic_load(&z->td->tid)) { - if (atomic_compare_exchange_weak(&cthread_zombies, &z, z->next)) { - munmap(z->td->alloc.bottom, z->td->alloc.top - z->td->alloc.bottom); - free(z); - } +privileged void __enable_threads(void) { + if (__threaded) return; + STRACE("__enable_threads()"); + __threaded = gettid(); + __morph_begin(); + /* + * _NOPL("__threadcalls", func) + * + * The big ugly macro above is used by Cosmopolitan Libc to unser + * locking primitive (e.g. flockfile, funlockfile) have zero impact on + * performance and binary size when threads aren't actually in play. + * + * we have this + * + * 0f 1f 05 b1 19 00 00 nopl func(%rip) + * + * we're going to turn it into this + * + * 67 67 e8 b1 19 00 00 addr32 addr32 call func + * + * This is cheap and fast because the big ugly macro stored in the + * binary the offsets of all the instructions we need to change. + */ + for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) { + _base[*p + 0] = 0x67; + _base[*p + 1] = 0x67; + _base[*p + 2] = 0xe8; } + __morph_end(); } diff --git a/libc/runtime/threadmode.c b/libc/runtime/enable_tls.c similarity index 86% rename from libc/runtime/threadmode.c rename to libc/runtime/enable_tls.c index 58da18e11..028dd32f0 100644 --- a/libc/runtime/threadmode.c +++ b/libc/runtime/enable_tls.c @@ -16,17 +16,14 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/assert.h" -#include "libc/bits/bits.h" #include "libc/calls/calls.h" +#include "libc/calls/strace.internal.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" #include "libc/errno.h" -#include "libc/intrin/kprintf.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/threaded.h" #include "libc/nt/thread.h" -#include "libc/nt/thunk/msabi.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/str/str.h" @@ -48,14 +45,14 @@ #define _TLDZ ((intptr_t)_tdata_size) #define _TIBZ sizeof(struct cthread_descriptor_t) -extern int __threadcalls_end[]; -extern int __threadcalls_start[]; -extern unsigned char __get_tls_nt_rax[]; __msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; +extern unsigned char __tls_mov_nt_rax[]; +extern unsigned char __tls_add_nt_rax[]; + privileged void __enable_tls(void) { - assert(!__threaded); - assert(!__tls_enabled); + if (__tls_enabled) return; + STRACE("__enable_tls()"); // allocate tls memory for main process // @@ -119,6 +116,7 @@ privileged void __enable_tls(void) { * -mno-tls-direct-seg-refs flag which generates code like this * * 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R + * 64 48 03 0R4 25 00 00 00 00 add %fs:0,%R * * Which on Mac we can replace with this: * @@ -127,17 +125,22 @@ privileged void __enable_tls(void) { * Whereas on Windows we'll replace it with this: * * 0f 1f 40 00 fatnop4 - * e8 xx xx xx xx call __get_tls_nt_%R + * e8 xx xx xx xx call __tls_mov_nt_%R * * Since we have no idea where the TLS instructions exist in the * binary, we need to disassemble the whole program image. This'll * potentially take a few milliseconds for some larger programs. * + * We check `_tls_content` which is generated by the linker script + * since it lets us determine ahead of time if _Thread_local vars + * have actually been linked into this program. + * * TODO(jart): compute probability this is just overkill */ - if (IsWindows() || IsXnu()) { + if ((intptr_t)_tls_content && (IsWindows() || IsXnu())) { int n, reg, dis; unsigned char *p; + const unsigned char *impl; struct XedDecodedInst xedd; __morph_begin(); @@ -154,7 +157,8 @@ privileged void __enable_tls(void) { if (xedd.length == 9 && // 0144 == p[0] && // fs 0110 == p[1] && // rex.w (64-bit operand size) - 0213 == p[2] && // mov reg/mem → reg (word-sized) + (0213 == p[2] || // mov reg/mem → reg (word-sized) + 0003 == p[2]) && // add reg/mem → reg (word-sized) 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 0000 == p[5] && // displacement (von Neumann endian) @@ -175,8 +179,13 @@ privileged void __enable_tls(void) { // that's the same as the mov destination. When setting // function displacement, &CALL+5+DISP must equal &FUNC. else { + if (p[2] == 3) { + impl = __tls_add_nt_rax; + } else { + impl = __tls_mov_nt_rax; + } reg = (p[3] & 070) >> 3; - dis = (__get_tls_nt_rax + reg * 18) - (p + 9); + dis = (impl + reg * 18) - (p + 9); p[0] = 0017; // map1 p[1] = 0037; // nopl (onl if reg=0) p[2] = 0100; // mod/rm (%rax)+disp8 @@ -202,35 +211,6 @@ privileged void __enable_tls(void) { } // we are now allowed to use tls + // setting this variable __tls_enabled = true; } - -privileged void __enable_threads(void) { - assert(!__threaded); - __threaded = gettid(); - __morph_begin(); - /* - * _NOPL("__threadcalls", func) - * - * The big ugly macro above is used by Cosmopolitan Libc to unser - * locking primitive (e.g. flockfile, funlockfile) have zero impact on - * performance and binary size when threads aren't actually in play. - * - * we have this - * - * 0f 1f 05 b1 19 00 00 nopl func(%rip) - * - * we're going to turn it into this - * - * 67 67 e8 b1 19 00 00 addr32 addr32 call func - * - * This is cheap and fast because the big ugly macro stored in the - * binary the offsets of all the instructions we need to change. - */ - for (int *p = __threadcalls_start; p < __threadcalls_end; ++p) { - _base[*p + 0] = 0x67; - _base[*p + 1] = 0x67; - _base[*p + 2] = 0xe8; - } - __morph_end(); -} diff --git a/libc/runtime/internal.h b/libc/runtime/internal.h index 899019007..c0e20fc5c 100644 --- a/libc/runtime/internal.h +++ b/libc/runtime/internal.h @@ -24,6 +24,7 @@ extern unsigned char _tdata_size[]; extern unsigned char _tbss_start[]; extern unsigned char _tbss_end[]; extern unsigned char _tls_size[]; +extern unsigned char _tls_content[]; void _init(void) hidden; void __enable_tls(void) hidden; diff --git a/libc/runtime/morph.greg.c b/libc/runtime/morph.greg.c index e75acdf39..bda8dc526 100644 --- a/libc/runtime/morph.greg.c +++ b/libc/runtime/morph.greg.c @@ -19,6 +19,7 @@ #define ShouldUseMsabiAttribute() 1 #include "libc/bits/asmflag.h" #include "libc/calls/internal.h" +#include "libc/calls/strace.internal.h" #include "libc/calls/struct/sigset.h" #include "libc/dce.h" #include "libc/errno.h" @@ -58,6 +59,7 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot, */ privileged void __morph_begin(void) { sigset_t ss = {{-1, -1}}; + STRACE("__morph_begin()"); if (!IsWindows()) { sys_sigprocmask(SIG_BLOCK, &ss, &oldss); } @@ -74,4 +76,5 @@ privileged void __morph_end(void) { if (!IsWindows()) { sys_sigprocmask(SIG_SETMASK, &oldss, 0); } + STRACE("__morph_end()"); } diff --git a/libc/stubs/ld.S b/libc/stubs/ld.S index 1a870d9e3..a3570b917 100644 --- a/libc/stubs/ld.S +++ b/libc/stubs/ld.S @@ -51,6 +51,7 @@ _tbss_start = 0 _tbss_end = 0 _tls_size = 0 + _tls_content = 0 .globl _base .globl ape_xlm @@ -73,6 +74,7 @@ .globl _tbss_start .globl _tbss_end .globl _tls_size + .globl _tls_content .globl __data_start .globl __data_end .globl __bss_start @@ -99,6 +101,7 @@ .weak _tbss_start .weak _tbss_end .weak _tls_size + .weak _tls_content .weak __data_start .weak __data_end .weak __bss_start diff --git a/libc/sysv/tlspolyfill.S b/libc/sysv/tlspolyfill.S index f0c489e35..56d0b63df 100644 --- a/libc/sysv/tlspolyfill.S +++ b/libc/sysv/tlspolyfill.S @@ -25,66 +25,132 @@ // @note each function is exactly 18 bytes // @see __enable_threads() -__get_tls_nt_rax: +__tls_mov_nt_rax: push %rcx mov __tls_index(%rip),%ecx mov %gs:0x1480(,%rcx,8),%rax pop %rcx ret - .endfn __get_tls_nt_rax,globl,hidden + .endfn __tls_mov_nt_rax,globl,hidden -__get_tls_nt_rcx: +__tls_mov_nt_rcx: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rcx pop %rax ret - .endfn __get_tls_nt_rcx + .endfn __tls_mov_nt_rcx -__get_tls_nt_rdx: +__tls_mov_nt_rdx: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rdx pop %rax ret - .endfn __get_tls_nt_rdx + .endfn __tls_mov_nt_rdx -__get_tls_nt_rbx: +__tls_mov_nt_rbx: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rbx pop %rax ret - .endfn __get_tls_nt_rbx + .endfn __tls_mov_nt_rbx -__get_tls_nt_rsp: +__tls_mov_nt_rsp: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rsp pop %rax ret - .endfn __get_tls_nt_rsp + .endfn __tls_mov_nt_rsp -__get_tls_nt_rbp: +__tls_mov_nt_rbp: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rbp pop %rax ret - .endfn __get_tls_nt_rbp + .endfn __tls_mov_nt_rbp -__get_tls_nt_rsi: +__tls_mov_nt_rsi: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rsi pop %rax ret - .endfn __get_tls_nt_rsi + .endfn __tls_mov_nt_rsi -__get_tls_nt_rdi: +__tls_mov_nt_rdi: push %rax mov __tls_index(%rip),%eax mov %gs:0x1480(,%rax,8),%rdi pop %rax ret - .endfn __get_tls_nt_rdi + .endfn __tls_mov_nt_rdi + +//////////////////////////////////////////////////////////////////////////////// + +__tls_add_nt_rax: + push %rcx + mov __tls_index(%rip),%ecx + add %gs:0x1480(,%rcx,8),%rax + pop %rcx + ret + .endfn __tls_add_nt_rax,globl,hidden + +__tls_add_nt_rcx: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rcx + pop %rax + ret + .endfn __tls_add_nt_rcx + +__tls_add_nt_rdx: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rdx + pop %rax + ret + .endfn __tls_add_nt_rdx + +__tls_add_nt_rbx: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rbx + pop %rax + ret + .endfn __tls_add_nt_rbx + +__tls_add_nt_rsp: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rsp + pop %rax + ret + .endfn __tls_add_nt_rsp + +__tls_add_nt_rbp: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rbp + pop %rax + ret + .endfn __tls_add_nt_rbp + +__tls_add_nt_rsi: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rsi + pop %rax + ret + .endfn __tls_add_nt_rsi + +__tls_add_nt_rdi: + push %rax + mov __tls_index(%rip),%eax + add %gs:0x1480(,%rax,8),%rdi + pop %rax + ret + .endfn __tls_add_nt_rdi diff --git a/libc/thread/detach.c b/libc/thread/detach.c deleted file mode 100644 index 48f5abea4..000000000 --- a/libc/thread/detach.c +++ /dev/null @@ -1,58 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/bits/atomic.h" -#include "libc/calls/calls.h" -#include "libc/calls/strace.internal.h" -#include "libc/errno.h" -#include "libc/intrin/asan.internal.h" -#include "libc/str/str.h" -#include "libc/thread/thread.h" - -/** - * Detaches thread. - * - * Calling this function will cause the thread to free its own memory - * once it exits. Using this function is mutually exclusive from the - * chtread_join() API. - * - * @return 0 on success or errno number on failure - * @raises EINVAL if thread isn't joinable - * @raises ESRCH if no such thread exists - * @threadsafe - */ -int cthread_detach(cthread_t td) { - int rc, tid; - if (!td || (IsAsan() && !__asan_is_valid(td, sizeof(*td)))) { - rc = ESRCH; - tid = -1; - } else if ((tid = td->tid) == gettid()) { - rc = EDEADLK; - } else if (atomic_load(&td->state) & (cthread_detached | cthread_joining)) { - rc = EINVAL; - } else if (!atomic_fetch_add(&td->state, cthread_detached) & - cthread_finished) { - rc = 0; - } else if (!munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom)) { - rc = 0; - } else { - rc = errno; - } - STRACE("cthread_detached(%d) → %s", tid, !rc ? "0" : strerrno(rc)); - return rc; -} diff --git a/libc/thread/exit.c b/libc/thread/exit.c deleted file mode 100644 index 8a0b40f8a..000000000 --- a/libc/thread/exit.c +++ /dev/null @@ -1,33 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/strace.internal.h" -#include "libc/runtime/runtime.h" -#include "libc/thread/thread.h" - -/** - * Exits cosmopolitan thread. - * - * @param exitcode is passed along to cthread_join() - * @threadsafe - * @noreturn - */ -wontreturn void cthread_exit(void *exitcode) { - STRACE("cthread_exit(%p)", exitcode); - longerjmp(cthread_self()->exiter, (intptr_t)exitcode); -} diff --git a/libc/thread/internal.h b/libc/thread/internal.h index 346c11399..331e7899b 100644 --- a/libc/thread/internal.h +++ b/libc/thread/internal.h @@ -4,8 +4,9 @@ #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ -extern uint64_t _pthread_key_usage[(PTHREAD_KEYS_MAX + 63) / 64]; -extern pthread_key_dtor _pthread_key_dtor[PTHREAD_KEYS_MAX]; +hidden extern uint64_t _pthread_key_usage[(PTHREAD_KEYS_MAX + 63) / 64]; +hidden extern pthread_key_dtor _pthread_key_dtor[PTHREAD_KEYS_MAX]; +hidden extern _Thread_local void *_pthread_keys[PTHREAD_KEYS_MAX]; void _pthread_key_destruct(void *[PTHREAD_KEYS_MAX]); diff --git a/libc/thread/key.c b/libc/thread/key.c index 954369123..63d917840 100644 --- a/libc/thread/key.c +++ b/libc/thread/key.c @@ -18,5 +18,11 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/thread/internal.h" +// tls value slots for pthread keys api +_Thread_local void *_pthread_keys[PTHREAD_KEYS_MAX]; + +// bitset of tls key registrations uint64_t _pthread_key_usage[(PTHREAD_KEYS_MAX + 63) / 64]; + +// pthread tls key destructors pthread_key_dtor _pthread_key_dtor[PTHREAD_KEYS_MAX]; diff --git a/libc/thread/pthread_getspecific.c b/libc/thread/pthread_getspecific.c index 4ee71c174..0aa0771af 100644 --- a/libc/thread/pthread_getspecific.c +++ b/libc/thread/pthread_getspecific.c @@ -18,16 +18,15 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" #include "libc/nexgen32e/gettls.h" +#include "libc/thread/internal.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * Gets value of TLS slot for current thread. */ void *pthread_getspecific(pthread_key_t key) { - if (key < PTHREAD_KEYS_MAX) { - return ((cthread_t)__get_tls_inline())->key[key]; + if (0 <= key && key < PTHREAD_KEYS_MAX) { + return _pthread_keys[key]; } else { return 0; } diff --git a/libc/thread/pthread_key_create.c b/libc/thread/pthread_key_create.c index 88981bb97..04665450c 100644 --- a/libc/thread/pthread_key_create.c +++ b/libc/thread/pthread_key_create.c @@ -23,8 +23,6 @@ #include "libc/thread/internal.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * Allocates TLS slot. */ @@ -43,7 +41,7 @@ int pthread_key_create(pthread_key_t *key, pthread_key_dtor dtor) { } static textexit void _pthread_key_atexit(void) { - _pthread_key_destruct(((cthread_t)__get_tls())->key); + _pthread_key_destruct(_pthread_keys); } __attribute__((__constructor__)) static textstartup void _pthread_key_init() { diff --git a/libc/thread/pthread_setspecific.c b/libc/thread/pthread_setspecific.c index da8a77409..8d5b699d8 100644 --- a/libc/thread/pthread_setspecific.c +++ b/libc/thread/pthread_setspecific.c @@ -18,16 +18,15 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/errno.h" #include "libc/nexgen32e/gettls.h" +#include "libc/thread/internal.h" #include "libc/thread/thread.h" -STATIC_YOINK("_main_thread_ctor"); - /** * Sets value of TLS slot for current thread. */ int pthread_setspecific(pthread_key_t key, void *val) { - if (key < PTHREAD_KEYS_MAX) { - ((cthread_t)__get_tls_inline())->key[key] = val; + if (0 <= key && key < PTHREAD_KEYS_MAX) { + _pthread_keys[key] = val; return 0; } else { return EINVAL; diff --git a/libc/thread/sem.c b/libc/thread/sem.c index 9c75c32c6..4187584ba 100644 --- a/libc/thread/sem.c +++ b/libc/thread/sem.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/atomic.h" +#include "libc/calls/calls.h" #include "libc/thread/thread.h" STATIC_YOINK("_main_thread_ctor"); @@ -29,7 +30,7 @@ static void Pause(int attempt) { __builtin_ia32_pause(); } } else { - cthread_yield(); + sched_yield(); } } diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c index a051b84fd..fab370ea6 100644 --- a/libc/thread/spawn.c +++ b/libc/thread/spawn.c @@ -35,7 +35,19 @@ STATIC_YOINK("_main_thread_ctor"); /** - * @fileoverview Simple System Threads API + * @fileoverview Simple threading API + * + * This API is supported on all six operating systems. We have this + * because the POSIX threads API is positively enormous. We currently + * only implement a small subset of POSIX threads, e.g. mutexes. So + * until we can implement all of POSIX threads, this API is great. If we + * consider that the classic forking concurrency library consists of a + * single function, it's a shame POSIX didn't define threads in the past + * to just be this. Since create/join/atomics is really all we need. + * + * Your spawn library abstracts clone() which also works on all + * platforms; however our implementation of clone() is significantly + * complicated so we strongly recommend always favoring this API. */ #define _TLSZ ((intptr_t)_tls_size) @@ -50,7 +62,7 @@ STATIC_YOINK("_main_thread_ctor"); * @param arg shall be passed to `fun` * @param opt_out_thread needn't be initialiized and is always clobbered * except when it isn't specified, in which case, the thread is kind - * of detached and will leak in stack / tls memory + * of detached and will (currently) just leak the stack / tls memory * @return 0 on success, or -1 w/ errno */ int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) { diff --git a/libc/thread/thread.h b/libc/thread/thread.h index d8938c972..282cb0ef6 100644 --- a/libc/thread/thread.h +++ b/libc/thread/thread.h @@ -23,17 +23,11 @@ struct cthread_descriptor_t { int32_t __pad0; /* 0x10 */ int32_t state; /* 0x14 */ void *arg; /* 0x18 */ - void *pthread_ret_ptr; /* 0x20 */ - int64_t __pad1; /* 0x28 */ + int64_t __pad1; /* 0x20 */ + int64_t __pad2; /* 0x28 */ struct cthread_descriptor_t *self2; /* 0x30 */ int32_t tid; /* 0x38 */ int32_t err; /* 0x3c */ - void *exitcode; - struct { - char *top, *bottom; - } stack, alloc; - jmp_buf exiter; - void *key[PTHREAD_KEYS_MAX]; }; typedef struct cthread_descriptor_t *cthread_t; @@ -49,16 +43,7 @@ typedef struct cthread_attr_t { int mode; } cthread_attr_t; -extern const void *const _main_thread_ctor[]; - -int cthread_create(cthread_t *, const cthread_attr_t *, void *(*)(void *), - void *); - -int cthread_yield(void); cthread_t cthread_self(void); -int cthread_join(cthread_t, void **); -void cthread_exit(void *) wontreturn; -int cthread_detach(cthread_t); int cthread_attr_init(cthread_attr_t *); int cthread_attr_destroy(cthread_attr_t *); int cthread_attr_setstacksize(cthread_attr_t *, size_t); diff --git a/libc/thread/yield.c b/libc/thread/yield.c deleted file mode 100644 index c3336f850..000000000 --- a/libc/thread/yield.c +++ /dev/null @@ -1,27 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2020 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/calls.h" -#include "libc/thread/thread.h" - -/** - * Asks operating system to handoff remaining time slice. - */ -int cthread_yield(void) { - return sched_yield(); -} diff --git a/test/libc/thread/dog.c b/test/libc/thread/dog.c deleted file mode 100644 index 184fb3c9b..000000000 --- a/test/libc/thread/dog.c +++ /dev/null @@ -1,30 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ - -_Thread_local char x; -static _Thread_local char y; - -char ha(void) { - ++y; - return x; -} - -char ya(void) { - return y; -} diff --git a/test/tool/net/lre_test.lua b/test/tool/net/lre_test.lua index 3369d76c3..f0c782016 100644 --- a/test/tool/net/lre_test.lua +++ b/test/tool/net/lre_test.lua @@ -42,6 +42,10 @@ p,e = re.compile("[{") assert(e:errno() == re.EBRACK) assert(e:doc() == "Missing ']'") +p,e = re.search("notfound", "fanatics have their dreams wherewith they weave") +assert(not p) +assert(e:errno() == re.NOMATCH) + ---------------------------------------------------------------------------------------------------- -- BENCHMARKS