Make _Thread_local work across platforms

We now rewrite the binary image at runtime on Windows and XNU to change
mov %fs:0,%reg instructions to use %gs instead. There's also simpler
threading API introduced by this change and it's called _spawn() and
_join(), which has replaced most clone() usage.
This commit is contained in:
Justine Tunney 2022-07-10 04:01:17 -07:00
parent e4d6e263d4
commit 5f4f6b0e69
51 changed files with 808 additions and 1043 deletions

View file

@ -1,133 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/errno.h"
#include "libc/intrin/setjmp.internal.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/internal.h"
#include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
static cthread_t cthread_allocate(const cthread_attr_t *attr) {
char *mem;
size_t size;
cthread_t td;
size = ROUNDUP(
attr->stacksize +
ROUNDUP((uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t),
PAGESIZE),
FRAMESIZE);
mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) return 0;
if (attr->guardsize > PAGESIZE) {
mprotect(mem, attr->guardsize, PROT_NONE);
}
td = (cthread_t)(mem + size - sizeof(struct cthread_descriptor_t));
td->self = td;
td->self2 = td;
td->err = errno;
td->tid = -1;
td->stack.bottom = mem;
td->stack.top = mem + attr->stacksize;
td->alloc.bottom = mem;
td->alloc.top = mem + size;
if (attr->mode & CTHREAD_CREATE_DETACHED) {
td->state = cthread_detached;
} else {
td->state = cthread_started;
}
// Initialize TLS with content of .tdata section
memmove((void *)((intptr_t)td - (intptr_t)_tls_size), _tdata_start,
(intptr_t)_tdata_size);
return td;
}
static int cthread_start(void *arg) {
axdx_t rc;
void *exitcode;
cthread_t td = arg;
if (!(rc = setlongerjmp(td->exiter)).ax) {
exitcode = td->func(td->arg);
} else {
exitcode = (void *)rc.dx;
}
td->exitcode = exitcode;
_pthread_key_destruct(td->key);
if (atomic_load(&td->state) & cthread_detached) {
// we're still using the stack
// thus we can't munmap it yet
// kick the can down the road!
cthread_zombies_add(td);
}
atomic_fetch_add(&td->state, cthread_finished);
return 0;
}
/**
* Creates thread.
*
* @param ptd will receive pointer to new thread descriptor
* @param attr contains special configuration if non-null
* @param func is thread callback function
* @param arg is argument supplied to `func`
* @return 0 on success, or error number on failure
* @threadsafe
*/
int cthread_create(cthread_t *ptd, const cthread_attr_t *attr,
void *(*func)(void *), void *arg) {
int rc, tid;
cthread_t td;
cthread_attr_t default_attr;
__threaded = true;
cthread_zombies_reap();
cthread_attr_init(&default_attr);
if ((td = cthread_allocate(attr ? attr : &default_attr))) {
td->func = func;
td->arg = arg;
cthread_attr_destroy(&default_attr);
tid =
clone(cthread_start, td->stack.bottom, td->stack.top - td->stack.bottom,
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
td, 0, td, sizeof(struct cthread_descriptor_t), &td->tid);
if (tid != -1) {
*ptd = td;
rc = 0;
} else {
rc = errno;
munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom);
}
} else {
rc = errno;
tid = -1;
}
STRACE("cthread_create([%d], %p, %p, %p) → %s", tid, attr, func, arg,
!rc ? "0" : strerrno(rc));
return rc;
}

View file

@ -21,7 +21,7 @@
.init.start 400,_main_thread_ctor
push %rdi
push %rsi
call _main_thread_init
call __enable_tls
pop %rsi
pop %rdi
.init.end 400,_main_thread_ctor

View file

@ -1,72 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/thread/thread.h"
/**
* Waits for thread to terminate and frees its memory.
*
* @param td is thread descriptor memory
* @param exitcode optionally receives value returned by thread
* @return 0 on success, or error number on failure
* @raises EDEADLK when trying to join this thread
* @raises EINVAL if another thread is joining
* @raises ESRCH if no such thread exists
* @raises EINVAL if not joinable
* @threadsafe
*/
int cthread_join(cthread_t td, void **exitcode) {
int x, rc, tid;
// otherwise, tid could be set to 0 even though `state` is not
// finished mark thread as joining
if (!td || (IsAsan() && !__asan_is_valid(td, sizeof(*td)))) {
rc = ESRCH;
tid = -1;
} else if ((tid = td->tid) == gettid()) { // tid must load before lock xadd
rc = EDEADLK;
} else if (atomic_load(&td->state) & (cthread_detached | cthread_joining)) {
rc = EINVAL;
} else {
if (~atomic_fetch_add(&td->state, cthread_joining) & cthread_finished) {
while ((x = atomic_load(&td->tid))) {
cthread_memory_wait32(&td->tid, x, 0);
}
}
if (exitcode) {
*exitcode = td->exitcode;
}
if (!munmap(td->alloc.bottom, td->alloc.top - td->alloc.bottom)) {
rc = 0;
} else {
rc = errno;
}
}
STRACE("cthread_join(%d, [%p]) → %s", tid, !rc && exitcode ? *exitcode : 0,
!rc ? "0" : strerrno(rc));
return rc;
}

View file

@ -1,7 +1,7 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2020 Justine Alexandra Roberts Tunney
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
@ -16,53 +16,41 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/errno.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/mem/mem.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
textstartup void _main_thread_init(void) {
_Static_assert(offsetof(struct cthread_descriptor_t, self) == 0x00, "");
_Static_assert(offsetof(struct cthread_descriptor_t, self2) == 0x30, "");
_Static_assert(offsetof(struct cthread_descriptor_t, tid) == 0x38, "");
_Static_assert(offsetof(struct cthread_descriptor_t, err) == 0x3c, "");
cthread_t td;
size_t totalsize;
char *mem, *bottom, *top;
#define _TLSZ ((intptr_t)_tls_size)
#define _TLDZ ((intptr_t)_tdata_size)
#define _TIBZ sizeof(struct cthread_descriptor_t)
#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
totalsize = ROUNDUP(
(uintptr_t)_tls_size + sizeof(struct cthread_descriptor_t), FRAMESIZE);
/**
* Allocates thread-local storage memory for new thread.
* @return buffer that must be released with free()
*/
char *_mktls(char **out_tib) {
char *tls;
cthread_t tib;
mem = mmap(0, totalsize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE,
-1, 0);
assert(mem != MAP_FAILED);
// Allocate enough TLS memory for all the GNU Linuker (_tls_size)
// organized _Thread_local data, as well as Cosmpolitan Libc (64)
if (!(tls = calloc(1, _MEMZ))) return 0;
bottom = mem;
top = mem + totalsize;
// set up thread informaiton block
tib = (cthread_t)(tls + _MEMZ - _TIBZ);
tib->self = tib;
tib->self2 = tib;
tib->err = 0;
tib->tid = -1;
memmove(tls, _tdata_start, _TLDZ);
td = (cthread_t)(top - sizeof(struct cthread_descriptor_t));
td->self = td;
td->self2 = td;
td->err = errno;
td->tid = gettid();
td->alloc.bottom = bottom;
td->alloc.top = top;
td->stack.bottom = GetStackAddr(0);
td->stack.top = td->stack.bottom + GetStackSize();
td->state = cthread_main;
// Initialize TLS with content of .tdata section
memmove((void *)((uintptr_t)td - (uintptr_t)_tls_size), _tdata_start,
(uintptr_t)_tdata_size);
// Set FS
__install_tls((char *)td);
if (out_tib) {
*out_tib = (char *)tib;
}
return tls;
}

113
libc/thread/spawn.c Normal file
View file

@ -0,0 +1,113 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/spawn.h"
#include "libc/thread/thread.h"
STATIC_YOINK("_main_thread_ctor");
/**
* @fileoverview Simple System Threads API
*/
#define _TLSZ ((intptr_t)_tls_size)
#define _TLDZ ((intptr_t)_tdata_size)
#define _TIBZ sizeof(struct cthread_descriptor_t)
#define _MEMZ ROUNDUP(_TLSZ + _TIBZ, alignof(struct cthread_descriptor_t))
/**
* Spawns thread.
*
* @param fun is thread worker callback, which receives `arg` and `ctid`
* @param arg shall be passed to `fun`
* @param opt_out_thread needn't be initialiized and is always clobbered
* except when it isn't specified, in which case, the thread is kind
* of detached and will leak in stack / tls memory
* @return 0 on success, or -1 w/ errno
*/
int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) {
struct spawn *th, ths;
// we need to to clobber the output memory before calling clone, since
// there's no guarantee clone() won't suspend the parent, and focus on
// running the child instead; in that case child might want to read it
if (opt_out_thread) {
th = opt_out_thread;
} else {
th = &ths;
}
// Allocate enough TLS memory for all the GNU Linuker (_tls_size)
// organized _Thread_local data, as well as Cosmpolitan Libc (64)
if (!(th->tls = _mktls(&th->tib))) {
return -1;
}
th->ctid = (int *)(th->tib + 0x38);
// We must use _mapstack() to allocate the stack because OpenBSD has
// very strict requirements for what's allowed to be used for stacks
if (!(th->stk = _mapstack())) {
free(th->tls);
return -1;
}
if (clone(fun, th->stk, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
arg, &th->ptid, th->tib, _TIBZ, th->ctid) == -1) {
_freestack(th->stk);
free(th->tls);
return -1;
}
return 0;
}
/**
* Waits for thread created by _spawn() to terminate.
*
* This will free your thread's stack and tls memory too.
*/
int _join(struct spawn *th) {
int rc;
if (th->ctid) {
// wait for ctid to become zero
_wait0(th->ctid);
// free thread memory
free(th->tls);
rc = munmap(th->stk, GetStackSize());
} else {
rc = 0;
}
bzero(th, sizeof(*th));
return rc;
}

20
libc/thread/spawn.h Normal file
View file

@ -0,0 +1,20 @@
#ifndef COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
#define COSMOPOLITAN_LIBC_THREAD_SPAWN_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct spawn {
int ptid;
int *ctid;
char *stk;
char *tls;
char *tib;
};
int _spawn(int (*)(void *, int), void *, struct spawn *) hidden;
int _join(struct spawn *) hidden;
char *_mktls(char **) hidden;
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_THREAD_SPAWN_H_ */

View file

@ -15,7 +15,6 @@ enum cthread_state {
cthread_joining = 1,
cthread_finished = 2,
cthread_detached = 4,
cthread_main = 127,
};
struct cthread_descriptor_t {