From 45a7435788540d1c6c048225835c5eefaa7c1ef8 Mon Sep 17 00:00:00 2001 From: Florian Lemaitre Date: Tue, 26 Oct 2021 01:02:26 +0200 Subject: [PATCH] [WIP] Threading phase 2 (#301) * Exponential back-off * Removed "native" specifier * Abstract away Futex for cthread * Complete setup for TLS (including main thread) --- ape/ape.lds | 13 ++++ examples/thread.c | 25 ++++--- libc/runtime/cosmo.S | 1 + libc/runtime/cthread_init.c | 68 ++++++++++++++++++ libc/stubs/ld.S | 15 ++++ libc/thread/create.c | 14 +++- libc/thread/descriptor.h | 1 + libc/thread/{nativesem.c => sem.c} | 112 +++++++++++++---------------- libc/thread/{nativesem.h => sem.h} | 12 ++-- libc/thread/wait.c | 50 +++++++++++++ libc/thread/wait.h | 16 +++++ 11 files changed, 249 insertions(+), 78 deletions(-) create mode 100644 libc/runtime/cthread_init.c rename libc/thread/{nativesem.c => sem.c} (54%) rename libc/thread/{nativesem.h => sem.h} (56%) create mode 100644 libc/thread/wait.c create mode 100644 libc/thread/wait.h diff --git a/ape/ape.lds b/ape/ape.lds index b1ffd1383..ac41b1bc5 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -342,6 +342,19 @@ SECTIONS { /*END: Read Only Data (only needed for initialization) */ /*END: Read Only Data */ } :Rom + + .tdata . : { + _tdata_start = .; + *(SORT_BY_ALIGNMENT(.tdata)) + *(SORT_BY_ALIGNMENT(.tdata.*)) + _tdata_end = .; + } + .tbss . : { + _tbss_start = .; + *(SORT_BY_ALIGNMENT(.tbss)) + *(SORT_BY_ALIGNMENT(.tbss.*)) + _tbss_end = .; + } .data . : { /*BEGIN: Read/Write Data */ diff --git a/examples/thread.c b/examples/thread.c index 06ae57914..2f9c55523 100644 --- a/examples/thread.c +++ b/examples/thread.c @@ -7,35 +7,44 @@ │ • http://creativecommons.org/publicdomain/zero/1.0/ │ ╚─────────────────────────────────────────────────────────────────*/ #endif +#include "libc/calls/calls.h" +#include "libc/runtime/runtime.h" #include "libc/stdio/stdio.h" #include "libc/thread/create.h" #include "libc/thread/self.h" #include "libc/thread/detach.h" #include "libc/thread/join.h" -#include "libc/thread/nativesem.h" +#include "libc/thread/sem.h" #include "libc/time/time.h" -cthread_native_sem_t semaphore; +cthread_sem_t semaphore; + +__thread int test_tls = 0x12345678; int worker(void* arg) { - cthread_native_sem_signal(&semaphore); + void* p; + arch_prctl(ARCH_GET_FS, &p); + cthread_sem_signal(&semaphore); cthread_t self = cthread_self(); int tid = self->tid; sleep(1); //sleep(10000); - //printf("[%p] %d\n", self, tid); + printf("[%p] %d -> 0x%x\n", self, tid, test_tls); (void)arg; return 4; } int main() { - cthread_native_sem_init(&semaphore, 0); + cthread_t self = cthread_self(); + int tid = self->tid; + printf("[%p] %d -> 0x%x\n", self, tid, test_tls); + cthread_sem_init(&semaphore, 0); cthread_t thread; int rc = cthread_create(&thread, NULL, &worker, NULL); if (rc == 0) { - cthread_native_sem_wait(&semaphore, 0, 0, NULL); + cthread_sem_wait(&semaphore, 0, NULL); //printf("thread created: %p\n", thread); sleep(1); #if 1 @@ -44,8 +53,8 @@ int main() { rc = cthread_detach(thread); sleep(2); #endif - cthread_native_sem_signal(&semaphore); - cthread_native_sem_wait(&semaphore, 0, 0, NULL); + cthread_sem_signal(&semaphore); + cthread_sem_wait(&semaphore, 0, NULL); //printf("thread joined: %p -> %d\n", thread, rc); } else { printf("ERROR: thread could not be started: %d\n", rc); diff --git a/libc/runtime/cosmo.S b/libc/runtime/cosmo.S index 5760a93b0..a465e94a2 100644 --- a/libc/runtime/cosmo.S +++ b/libc/runtime/cosmo.S @@ -45,6 +45,7 @@ cosmo: push %rbp pop %rax #endif call _init + call _main_thread_init # FIXME: use .init.start macro ezlea __init_array_start,ax # static ctors in forward order .weak __init_array_start # could be called multiple times ezlea __init_array_end,cx # idempotency recommended diff --git a/libc/runtime/cthread_init.c b/libc/runtime/cthread_init.c new file mode 100644 index 000000000..20fbaa7bf --- /dev/null +++ b/libc/runtime/cthread_init.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/runtime/runtime.h" +#include "libc/str/str.h" +#include "libc/sysv/consts/map.h" +#include "libc/sysv/consts/nr.h" +#include "libc/sysv/consts/prot.h" +#include "libc/thread/descriptor.h" + +// TLS boundaries +extern char _tbss_start, _tbss_end, _tdata_start, _tdata_end; + +void _main_thread_init(void) { + size_t tbsssize = &_tbss_end - &_tbss_start; + size_t tdatasize = &_tdata_end - &_tdata_start; + size_t tlssize = tbsssize + tdatasize; + size_t totalsize = tlssize + sizeof(struct cthread_descriptor_t); + totalsize = (totalsize + PAGESIZE - 1) & -PAGESIZE; + + uintptr_t mem = (uintptr_t)mmap(NULL, totalsize, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (mem == -1) { + abort(); + } + + void* bottom = (void*)mem; + void* top = (void*)(mem + totalsize); + + cthread_t td = (cthread_t)top - 1; + td->self = td; + td->stack.top = NULL; + td->stack.bottom = NULL; + td->tls.top = top; + td->tls.bottom = bottom; + td->alloc.top = top; + td->alloc.bottom = bottom; + td->state = cthread_main; + + // Initialize TLS with content of .tdata section + memmove((void*)((uintptr_t)td - tlssize), &_tdata_start, tdatasize); + + // Get TID of main thread + int gettid = __NR_gettid; + if (gettid == 0xfff) gettid = __NR_getpid; + td->tid = syscall(gettid); + + // Set FS + if (arch_prctl(ARCH_SET_FS, td) != 0) { + abort(); + } +} diff --git a/libc/stubs/ld.S b/libc/stubs/ld.S index 583768fa5..23d78e6db 100644 --- a/libc/stubs/ld.S +++ b/libc/stubs/ld.S @@ -36,6 +36,13 @@ __relo_start = 0 __relo_end = 0 +// Thread local boundaries defined by linker script +// @see ape/ape.lds + _tbss_start = 0 + _tbss_end = 0 + _tdata_start = 0 + _tdata_end = 0 + .globl _base .globl ape_xlm .globl __relo_start @@ -48,6 +55,10 @@ .globl _end .globl _ereal .globl _etext + .globl _tbss_start + .globl _tbss_end + .globl _tdata_start + .globl _tdata_end .weak _base .weak ape_xlm @@ -61,3 +72,7 @@ .weak _end .weak _ereal .weak _etext + .weak _tbss_start + .weak _tbss_end + .weak _tdata_start + .weak _tdata_end diff --git a/libc/thread/create.c b/libc/thread/create.c index c5b5d0c42..45582ac4b 100644 --- a/libc/thread/create.c +++ b/libc/thread/create.c @@ -19,17 +19,25 @@ #include "libc/errno.h" #include "libc/linux/clone.h" #include "libc/runtime/runtime.h" +#include "libc/str/str.h" #include "libc/sysv/consts/clone.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/nr.h" #include "libc/sysv/consts/prot.h" #include "libc/thread/create.h" +// TLS boundaries +extern char _tbss_start, _tbss_end, _tdata_start, _tdata_end; + static cthread_t _thread_allocate(const cthread_attr_t* attr) { + //extern void _main_thread_init(void); + //void (*dummy)(void) = &_main_thread_init; + //asm(""::"r"(dummy)); size_t stacksize = attr->stacksize; size_t guardsize = attr->guardsize; - // FIXME: properly count TLS size - size_t tlssize = 0; + size_t tbsssize = &_tbss_end - &_tbss_start; + size_t tdatasize = &_tdata_end - &_tdata_start; + size_t tlssize = tbsssize + tdatasize; size_t totalsize = 3 * guardsize + stacksize + tlssize + sizeof(struct cthread_descriptor_t); @@ -64,6 +72,8 @@ static cthread_t _thread_allocate(const cthread_attr_t* attr) { td->alloc.bottom = alloc_bottom; td->state = (attr->mode & CTHREAD_CREATE_DETACHED) ? cthread_detached : cthread_started; + // Initialize TLS with content of .tdata section + memmove((void*)((uintptr_t)td - tlssize), &_tdata_start, tdatasize); return td; } diff --git a/libc/thread/descriptor.h b/libc/thread/descriptor.h index 587092d12..29e815563 100644 --- a/libc/thread/descriptor.h +++ b/libc/thread/descriptor.h @@ -12,6 +12,7 @@ enum cthread_state { cthread_joining = 1, cthread_finished = 2, cthread_detached = 4, + cthread_main = 127, }; struct cthread_descriptor_t { diff --git a/libc/thread/nativesem.c b/libc/thread/sem.c similarity index 54% rename from libc/thread/nativesem.c rename to libc/thread/sem.c index 2e7eccafb..5d005cdb8 100644 --- a/libc/thread/nativesem.c +++ b/libc/thread/sem.c @@ -17,23 +17,32 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/bits/atomic.h" -#include "libc/sysv/consts/futex.h" -#include "libc/sysv/consts/nr.h" -#include "libc/thread/nativesem.h" +#include "libc/thread/sem.h" +#include "libc/thread/wait.h" #include "libc/thread/yield.h" #define CTHREAD_THREAD_VAL_BITS 32 -int cthread_native_sem_init(cthread_native_sem_t* sem, int count) { +static void pause(int attempt) { + if (attempt < 16) { + for (int i = 0; i < (1 << attempt); ++i) { + asm("pause"); + } + } else { + cthread_yield(); + } +} + +int cthread_sem_init(cthread_sem_t* sem, int count) { sem->linux.count = count; return 0; } -int cthread_native_sem_destroy(cthread_native_sem_t* sem) { +int cthread_sem_destroy(cthread_sem_t* sem) { (void)sem; return 0; } -int cthread_native_sem_signal(cthread_native_sem_t* sem) { +int cthread_sem_signal(cthread_sem_t* sem) { uint64_t count; asm volatile("lock xadd\t%1, %0" : "+m"(sem->linux.count), "=r"(count) @@ -41,21 +50,15 @@ int cthread_native_sem_signal(cthread_native_sem_t* sem) { : "cc"); if ((count >> CTHREAD_THREAD_VAL_BITS)) { - int flags = FUTEX_WAKE; - // WARNING: an offset of 4 bytes would be required on little-endian archs void* wait_address = &sem->linux.count; - asm volatile("syscall" - : /* no outputs */ - : "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(1) - : "rcx", "r11", "cc", "memory"); + cthread_memory_wake32(wait_address, 1); } return 0; } -int cthread_native_sem_wait_slow(cthread_native_sem_t* sem, - const struct timespec* timeout) { +int cthread_sem_wait_futex(cthread_sem_t* sem, const struct timespec* timeout) { uint64_t count; // record current thread as waiter @@ -66,67 +69,52 @@ int cthread_native_sem_wait_slow(cthread_native_sem_t* sem, for (;;) { // try to acquire the semaphore, as well as remove itself from waiters - if ((uint32_t)count > 0 && - atomic_compare_exchange_weak( - &sem->linux.count, count, - count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS))) - break; - - int flags = FUTEX_WAIT; - register struct timespec* timeout_ asm("r10") = timeout; - + while ((uint32_t)count > 0) { + // without spin, we could miss a futex wake + if (atomic_compare_exchange_weak( + &sem->linux.count, count, + count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS))) { + return 0; + } + } + // WARNING: an offset of 4 bytes would be required on little-endian archs void* wait_address = &sem->linux.count; - asm volatile("syscall" - : /* no outputs */ - : "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(count), - "r"(timeout_) - : "rcx", "r11", "cc", "memory"); + cthread_memory_wait32(wait_address, count, timeout); count = atomic_load(&sem->linux.count); } return 0; } -int cthread_native_sem_wait_spin_yield(cthread_native_sem_t* sem, - uint64_t count, int yield, - const struct timespec* timeout) { - // spin on yield - while (yield-- > 0) { - if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) - break; // a thread is already waiting in queue - if ((uint32_t)count > 0 && - atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) - return 0; - cthread_yield(); - } - - return cthread_native_sem_wait_slow(sem, timeout); -} - -int cthread_native_sem_wait_spin(cthread_native_sem_t* sem, uint64_t count, - int spin, int yield, - const struct timespec* timeout) { +int cthread_sem_wait_spin(cthread_sem_t* sem, uint64_t count, int spin, + const struct timespec* timeout) { // spin on pause - while (spin-- > 0) { - if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break; - if ((uint32_t)count > 0 && - atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) - return 0; - asm volatile("pause"); + for (int attempt = 0; attempt < spin; ++attempt) { + //if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break; + while ((uint32_t)count > 0) { + // spin is useful if multiple waiters can acquire the semaphore at the same time + if (atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) { + return 0; + } + } + pause(attempt); } - - return cthread_native_sem_wait_spin_yield(sem, count, yield, timeout); + + return cthread_sem_wait_futex(sem, timeout); } -int cthread_native_sem_wait(cthread_native_sem_t* sem, int spin, int yield, - const struct timespec* timeout) { +int cthread_sem_wait(cthread_sem_t* sem, int spin, + const struct timespec* timeout) { uint64_t count = atomic_load(&sem->linux.count); // uncontended - if ((count >> 32) == 0 && (uint32_t)count > 0 && - atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) - return 0; - - return cthread_native_sem_wait_spin(sem, count, spin, yield, timeout); + while ((uint32_t)count > 0) { + // spin is useful if multiple waiters can acquire the semaphore at the same time + if (atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) { + return 0; + } + } + + return cthread_sem_wait_spin(sem, count, spin, timeout); } diff --git a/libc/thread/nativesem.h b/libc/thread/sem.h similarity index 56% rename from libc/thread/nativesem.h rename to libc/thread/sem.h index df1b8583c..1e4183f34 100644 --- a/libc/thread/nativesem.h +++ b/libc/thread/sem.h @@ -7,19 +7,19 @@ COSMOPOLITAN_C_START_ * @fileoverview native semaphore for implementation details */ -typedef union cthread_native_sem_t { +typedef union cthread_sem_t { struct { uint64_t count; } linux; -} cthread_native_sem_t; +} cthread_sem_t; struct timespec; -int cthread_native_sem_init(cthread_native_sem_t*, int); -int cthread_native_sem_destroy(cthread_native_sem_t*); +int cthread_sem_init(cthread_sem_t*, int); +int cthread_sem_destroy(cthread_sem_t*); -int cthread_native_sem_wait(cthread_native_sem_t*, int, int, const struct timespec*); -int cthread_native_sem_signal(cthread_native_sem_t*); +int cthread_sem_wait(cthread_sem_t*, int, const struct timespec*); +int cthread_sem_signal(cthread_sem_t*); COSMOPOLITAN_C_END_ diff --git a/libc/thread/wait.c b/libc/thread/wait.c new file mode 100644 index 000000000..c0ecb7427 --- /dev/null +++ b/libc/thread/wait.c @@ -0,0 +1,50 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2020 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/sysv/consts/futex.h" +#include "libc/sysv/consts/nr.h" +#include "libc/thread/wait.h" + +int cthread_memory_wait32(uint32_t* addr, uint32_t val, + const struct timespec* timeout) { + if (__NR_futex != 0xfff) { + int flags = FUTEX_WAIT; + int rc; + register struct timespec* timeout_ asm("r10") = timeout; + asm volatile("syscall" + : "=a"(rc) + : "0"(__NR_futex), "D"(addr), "S"(flags), "d"(val), + "r"(timeout_) + : "rcx", "r11", "cc", "memory"); + return rc; + } + return -1; +} + +int cthread_memory_wake32(uint32_t* addr, int n) { + if (__NR_futex != 0xfff) { + int flags = FUTEX_WAKE; + int rc; + asm volatile("syscall" + : "=a"(rc) + : "0"(__NR_futex), "D"(addr), "S"(flags), "d"(n) + : "rcx", "r11", "cc", "memory"); + return rc; + } + return -1; +} diff --git a/libc/thread/wait.h b/libc/thread/wait.h new file mode 100644 index 000000000..21deaade9 --- /dev/null +++ b/libc/thread/wait.h @@ -0,0 +1,16 @@ +#ifndef COSMOPOLITAN_LIBC_THREAD_WAIT_H_ +#define COSMOPOLITAN_LIBC_THREAD_WAIT_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +/** + * @fileoverview wait on memory + */ +struct timespec; + +int cthread_memory_wait32(uint32_t*, uint32_t, const struct timespec*); +int cthread_memory_wake32(uint32_t*, int); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_THREAD_WAIT_H_ */