[WIP] Threading phase 2 (#301)

* Exponential back-off
* Removed "native" specifier
* Abstract away Futex for cthread
* Complete setup for TLS (including main thread)
This commit is contained in:
Florian Lemaitre 2021-10-26 01:02:26 +02:00 committed by GitHub
parent 660ff56d40
commit 45a7435788
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 249 additions and 78 deletions

View file

@ -342,6 +342,19 @@ SECTIONS {
/*END: Read Only Data (only needed for initialization) */
/*END: Read Only Data */
} :Rom
.tdata . : {
_tdata_start = .;
*(SORT_BY_ALIGNMENT(.tdata))
*(SORT_BY_ALIGNMENT(.tdata.*))
_tdata_end = .;
}
.tbss . : {
_tbss_start = .;
*(SORT_BY_ALIGNMENT(.tbss))
*(SORT_BY_ALIGNMENT(.tbss.*))
_tbss_end = .;
}
.data . : {
/*BEGIN: Read/Write Data */

View file

@ -7,35 +7,44 @@
http://creativecommons.org/publicdomain/zero/1.0/ │
*/
#endif
#include "libc/calls/calls.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/stdio.h"
#include "libc/thread/create.h"
#include "libc/thread/self.h"
#include "libc/thread/detach.h"
#include "libc/thread/join.h"
#include "libc/thread/nativesem.h"
#include "libc/thread/sem.h"
#include "libc/time/time.h"
cthread_native_sem_t semaphore;
cthread_sem_t semaphore;
__thread int test_tls = 0x12345678;
int worker(void* arg) {
cthread_native_sem_signal(&semaphore);
void* p;
arch_prctl(ARCH_GET_FS, &p);
cthread_sem_signal(&semaphore);
cthread_t self = cthread_self();
int tid = self->tid;
sleep(1);
//sleep(10000);
//printf("[%p] %d\n", self, tid);
printf("[%p] %d -> 0x%x\n", self, tid, test_tls);
(void)arg;
return 4;
}
int main() {
cthread_native_sem_init(&semaphore, 0);
cthread_t self = cthread_self();
int tid = self->tid;
printf("[%p] %d -> 0x%x\n", self, tid, test_tls);
cthread_sem_init(&semaphore, 0);
cthread_t thread;
int rc = cthread_create(&thread, NULL, &worker, NULL);
if (rc == 0) {
cthread_native_sem_wait(&semaphore, 0, 0, NULL);
cthread_sem_wait(&semaphore, 0, NULL);
//printf("thread created: %p\n", thread);
sleep(1);
#if 1
@ -44,8 +53,8 @@ int main() {
rc = cthread_detach(thread);
sleep(2);
#endif
cthread_native_sem_signal(&semaphore);
cthread_native_sem_wait(&semaphore, 0, 0, NULL);
cthread_sem_signal(&semaphore);
cthread_sem_wait(&semaphore, 0, NULL);
//printf("thread joined: %p -> %d\n", thread, rc);
} else {
printf("ERROR: thread could not be started: %d\n", rc);

View file

@ -45,6 +45,7 @@ cosmo: push %rbp
pop %rax
#endif
call _init
call _main_thread_init # FIXME: use .init.start macro
ezlea __init_array_start,ax # static ctors in forward order
.weak __init_array_start # could be called multiple times
ezlea __init_array_end,cx # idempotency recommended

View file

@ -0,0 +1,68 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/descriptor.h"
// TLS boundaries
extern char _tbss_start, _tbss_end, _tdata_start, _tdata_end;
void _main_thread_init(void) {
size_t tbsssize = &_tbss_end - &_tbss_start;
size_t tdatasize = &_tdata_end - &_tdata_start;
size_t tlssize = tbsssize + tdatasize;
size_t totalsize = tlssize + sizeof(struct cthread_descriptor_t);
totalsize = (totalsize + PAGESIZE - 1) & -PAGESIZE;
uintptr_t mem = (uintptr_t)mmap(NULL, totalsize, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (mem == -1) {
abort();
}
void* bottom = (void*)mem;
void* top = (void*)(mem + totalsize);
cthread_t td = (cthread_t)top - 1;
td->self = td;
td->stack.top = NULL;
td->stack.bottom = NULL;
td->tls.top = top;
td->tls.bottom = bottom;
td->alloc.top = top;
td->alloc.bottom = bottom;
td->state = cthread_main;
// Initialize TLS with content of .tdata section
memmove((void*)((uintptr_t)td - tlssize), &_tdata_start, tdatasize);
// Get TID of main thread
int gettid = __NR_gettid;
if (gettid == 0xfff) gettid = __NR_getpid;
td->tid = syscall(gettid);
// Set FS
if (arch_prctl(ARCH_SET_FS, td) != 0) {
abort();
}
}

View file

@ -36,6 +36,13 @@
__relo_start = 0
__relo_end = 0
// Thread local boundaries defined by linker script
// @see ape/ape.lds
_tbss_start = 0
_tbss_end = 0
_tdata_start = 0
_tdata_end = 0
.globl _base
.globl ape_xlm
.globl __relo_start
@ -48,6 +55,10 @@
.globl _end
.globl _ereal
.globl _etext
.globl _tbss_start
.globl _tbss_end
.globl _tdata_start
.globl _tdata_end
.weak _base
.weak ape_xlm
@ -61,3 +72,7 @@
.weak _end
.weak _ereal
.weak _etext
.weak _tbss_start
.weak _tbss_end
.weak _tdata_start
.weak _tdata_end

View file

@ -19,17 +19,25 @@
#include "libc/errno.h"
#include "libc/linux/clone.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/create.h"
// TLS boundaries
extern char _tbss_start, _tbss_end, _tdata_start, _tdata_end;
static cthread_t _thread_allocate(const cthread_attr_t* attr) {
//extern void _main_thread_init(void);
//void (*dummy)(void) = &_main_thread_init;
//asm(""::"r"(dummy));
size_t stacksize = attr->stacksize;
size_t guardsize = attr->guardsize;
// FIXME: properly count TLS size
size_t tlssize = 0;
size_t tbsssize = &_tbss_end - &_tbss_start;
size_t tdatasize = &_tdata_end - &_tdata_start;
size_t tlssize = tbsssize + tdatasize;
size_t totalsize =
3 * guardsize + stacksize + tlssize + sizeof(struct cthread_descriptor_t);
@ -64,6 +72,8 @@ static cthread_t _thread_allocate(const cthread_attr_t* attr) {
td->alloc.bottom = alloc_bottom;
td->state = (attr->mode & CTHREAD_CREATE_DETACHED) ? cthread_detached
: cthread_started;
// Initialize TLS with content of .tdata section
memmove((void*)((uintptr_t)td - tlssize), &_tdata_start, tdatasize);
return td;
}

View file

@ -12,6 +12,7 @@ enum cthread_state {
cthread_joining = 1,
cthread_finished = 2,
cthread_detached = 4,
cthread_main = 127,
};
struct cthread_descriptor_t {

View file

@ -17,23 +17,32 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/nativesem.h"
#include "libc/thread/sem.h"
#include "libc/thread/wait.h"
#include "libc/thread/yield.h"
#define CTHREAD_THREAD_VAL_BITS 32
int cthread_native_sem_init(cthread_native_sem_t* sem, int count) {
static void pause(int attempt) {
if (attempt < 16) {
for (int i = 0; i < (1 << attempt); ++i) {
asm("pause");
}
} else {
cthread_yield();
}
}
int cthread_sem_init(cthread_sem_t* sem, int count) {
sem->linux.count = count;
return 0;
}
int cthread_native_sem_destroy(cthread_native_sem_t* sem) {
int cthread_sem_destroy(cthread_sem_t* sem) {
(void)sem;
return 0;
}
int cthread_native_sem_signal(cthread_native_sem_t* sem) {
int cthread_sem_signal(cthread_sem_t* sem) {
uint64_t count;
asm volatile("lock xadd\t%1, %0"
: "+m"(sem->linux.count), "=r"(count)
@ -41,21 +50,15 @@ int cthread_native_sem_signal(cthread_native_sem_t* sem) {
: "cc");
if ((count >> CTHREAD_THREAD_VAL_BITS)) {
int flags = FUTEX_WAKE;
// WARNING: an offset of 4 bytes would be required on little-endian archs
void* wait_address = &sem->linux.count;
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(1)
: "rcx", "r11", "cc", "memory");
cthread_memory_wake32(wait_address, 1);
}
return 0;
}
int cthread_native_sem_wait_slow(cthread_native_sem_t* sem,
const struct timespec* timeout) {
int cthread_sem_wait_futex(cthread_sem_t* sem, const struct timespec* timeout) {
uint64_t count;
// record current thread as waiter
@ -66,67 +69,52 @@ int cthread_native_sem_wait_slow(cthread_native_sem_t* sem,
for (;;) {
// try to acquire the semaphore, as well as remove itself from waiters
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(
&sem->linux.count, count,
count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS)))
break;
int flags = FUTEX_WAIT;
register struct timespec* timeout_ asm("r10") = timeout;
while ((uint32_t)count > 0) {
// without spin, we could miss a futex wake
if (atomic_compare_exchange_weak(
&sem->linux.count, count,
count - 1 - ((uint64_t)1 << CTHREAD_THREAD_VAL_BITS))) {
return 0;
}
}
// WARNING: an offset of 4 bytes would be required on little-endian archs
void* wait_address = &sem->linux.count;
asm volatile("syscall"
: /* no outputs */
: "a"(__NR_futex), "D"(wait_address), "S"(flags), "d"(count),
"r"(timeout_)
: "rcx", "r11", "cc", "memory");
cthread_memory_wait32(wait_address, count, timeout);
count = atomic_load(&sem->linux.count);
}
return 0;
}
int cthread_native_sem_wait_spin_yield(cthread_native_sem_t* sem,
uint64_t count, int yield,
const struct timespec* timeout) {
// spin on yield
while (yield-- > 0) {
if ((count >> CTHREAD_THREAD_VAL_BITS) != 0)
break; // a thread is already waiting in queue
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
cthread_yield();
}
return cthread_native_sem_wait_slow(sem, timeout);
}
int cthread_native_sem_wait_spin(cthread_native_sem_t* sem, uint64_t count,
int spin, int yield,
const struct timespec* timeout) {
int cthread_sem_wait_spin(cthread_sem_t* sem, uint64_t count, int spin,
const struct timespec* timeout) {
// spin on pause
while (spin-- > 0) {
if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break;
if ((uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
asm volatile("pause");
for (int attempt = 0; attempt < spin; ++attempt) {
//if ((count >> CTHREAD_THREAD_VAL_BITS) != 0) break;
while ((uint32_t)count > 0) {
// spin is useful if multiple waiters can acquire the semaphore at the same time
if (atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) {
return 0;
}
}
pause(attempt);
}
return cthread_native_sem_wait_spin_yield(sem, count, yield, timeout);
return cthread_sem_wait_futex(sem, timeout);
}
int cthread_native_sem_wait(cthread_native_sem_t* sem, int spin, int yield,
const struct timespec* timeout) {
int cthread_sem_wait(cthread_sem_t* sem, int spin,
const struct timespec* timeout) {
uint64_t count = atomic_load(&sem->linux.count);
// uncontended
if ((count >> 32) == 0 && (uint32_t)count > 0 &&
atomic_compare_exchange_weak(&sem->linux.count, count, count - 1))
return 0;
return cthread_native_sem_wait_spin(sem, count, spin, yield, timeout);
while ((uint32_t)count > 0) {
// spin is useful if multiple waiters can acquire the semaphore at the same time
if (atomic_compare_exchange_weak(&sem->linux.count, count, count - 1)) {
return 0;
}
}
return cthread_sem_wait_spin(sem, count, spin, timeout);
}

View file

@ -7,19 +7,19 @@ COSMOPOLITAN_C_START_
* @fileoverview native semaphore for implementation details
*/
typedef union cthread_native_sem_t {
typedef union cthread_sem_t {
struct {
uint64_t count;
} linux;
} cthread_native_sem_t;
} cthread_sem_t;
struct timespec;
int cthread_native_sem_init(cthread_native_sem_t*, int);
int cthread_native_sem_destroy(cthread_native_sem_t*);
int cthread_sem_init(cthread_sem_t*, int);
int cthread_sem_destroy(cthread_sem_t*);
int cthread_native_sem_wait(cthread_native_sem_t*, int, int, const struct timespec*);
int cthread_native_sem_signal(cthread_native_sem_t*);
int cthread_sem_wait(cthread_sem_t*, int, const struct timespec*);
int cthread_sem_signal(cthread_sem_t*);
COSMOPOLITAN_C_END_

50
libc/thread/wait.c Normal file
View file

@ -0,0 +1,50 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/wait.h"
int cthread_memory_wait32(uint32_t* addr, uint32_t val,
const struct timespec* timeout) {
if (__NR_futex != 0xfff) {
int flags = FUTEX_WAIT;
int rc;
register struct timespec* timeout_ asm("r10") = timeout;
asm volatile("syscall"
: "=a"(rc)
: "0"(__NR_futex), "D"(addr), "S"(flags), "d"(val),
"r"(timeout_)
: "rcx", "r11", "cc", "memory");
return rc;
}
return -1;
}
int cthread_memory_wake32(uint32_t* addr, int n) {
if (__NR_futex != 0xfff) {
int flags = FUTEX_WAKE;
int rc;
asm volatile("syscall"
: "=a"(rc)
: "0"(__NR_futex), "D"(addr), "S"(flags), "d"(n)
: "rcx", "r11", "cc", "memory");
return rc;
}
return -1;
}

16
libc/thread/wait.h Normal file
View file

@ -0,0 +1,16 @@
#ifndef COSMOPOLITAN_LIBC_THREAD_WAIT_H_
#define COSMOPOLITAN_LIBC_THREAD_WAIT_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
/**
* @fileoverview wait on memory
*/
struct timespec;
int cthread_memory_wait32(uint32_t*, uint32_t, const struct timespec*);
int cthread_memory_wake32(uint32_t*, int);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_THREAD_WAIT_H_ */