Make _Thread_local work across platforms

We now rewrite the binary image at runtime on Windows and XNU to change
mov %fs:0,%reg instructions to use %gs instead. There's also simpler
threading API introduced by this change and it's called _spawn() and
_join(), which has replaced most clone() usage.
This commit is contained in:
Justine Tunney 2022-07-10 04:01:17 -07:00
parent e4d6e263d4
commit 5f4f6b0e69
51 changed files with 808 additions and 1043 deletions

View file

@ -47,6 +47,7 @@ TOOL_BUILD_DIRECTDEPS = \
LIBC_SYSV \
LIBC_SYSV_CALLS \
LIBC_TIME \
LIBC_THREAD \
LIBC_TINYMATH \
LIBC_UNICODE \
LIBC_X \

View file

@ -52,6 +52,7 @@
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/thread/spawn.h"
#include "libc/time/time.h"
#include "libc/x/x.h"
#include "third_party/getopt/getopt.h"
@ -122,11 +123,10 @@ struct Edges {
};
char *out;
char **tls;
int threads;
char **bouts;
char **stack;
unsigned counter;
struct spawn *th;
struct GetArgs ga;
struct Edges edges;
struct Sauce *sauces;
@ -248,7 +248,7 @@ wontreturn void OnMissingFile(const char *list, const char *src) {
exit(1);
}
int LoadRelationshipsWorker(void *arg) {
int LoadRelationshipsWorker(void *arg, int tid) {
int fd;
ssize_t rc;
bool skipme;
@ -307,18 +307,14 @@ void LoadRelationships(int argc, char *argv[]) {
int i;
getargs_init(&ga, argv + optind);
for (i = 0; i < threads; ++i) {
if (clone(LoadRelationshipsWorker, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64,
(int *)(tls[i] + 0x38)) == -1) {
if (_spawn(LoadRelationshipsWorker, (void *)(intptr_t)i, th + i) == -1) {
pthread_mutex_lock(&reportlock);
kprintf("error: clone(%d) failed %m\n", i);
exit(1);
}
}
for (i = 0; i < threads; ++i) {
_wait0((int *)(tls[i] + 0x38));
_join(th + i);
}
getargs_destroy(&ga);
}
@ -388,17 +384,17 @@ void Dive(char **bout, uint32_t *visited, unsigned id) {
}
}
int Diver(void *arg) {
int Diver(void *arg, int tid) {
char *bout = 0;
const char *path;
uint32_t *visited;
size_t i, visilen;
char pathbuf[PATH_MAX];
int tid = (intptr_t)arg;
int x = (intptr_t)arg;
visilen = (sources.i + sizeof(*visited) * CHAR_BIT - 1) /
(sizeof(*visited) * CHAR_BIT);
visited = malloc(visilen * sizeof(*visited));
for (i = tid; i < sources.i; i += threads) {
for (i = x; i < sources.i; i += threads) {
path = strings.p + sauces[i].name;
if (!IsObjectSource(path)) continue;
appendw(&bout, '\n');
@ -415,25 +411,21 @@ int Diver(void *arg) {
}
free(visited);
appendw(&bout, '\n');
bouts[tid] = bout;
bouts[x] = bout;
return 0;
}
void Explore(void) {
int i;
for (i = 0; i < threads; ++i) {
if (clone(Diver, stack[i], GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
(void *)(intptr_t)i, 0, __initialize_tls(tls[i]), 64,
(int *)(tls[i] + 0x38)) == -1) {
if (_spawn(Diver, (void *)(intptr_t)i, th + i) == -1) {
pthread_mutex_lock(&reportlock);
kprintf("error: clone(%d) failed %m\n", i);
exit(1);
}
}
for (i = 0; i < threads; ++i) {
_wait0((int *)(tls[i] + 0x38));
_join(th + i);
}
}
@ -443,17 +435,8 @@ int main(int argc, char *argv[]) {
if (argc == 2 && !strcmp(argv[1], "-n")) exit(0);
GetOpts(argc, argv);
threads = GetCpuCount();
tls = calloc(threads, sizeof(*tls));
stack = calloc(threads, sizeof(*stack));
th = calloc(threads, sizeof(*th));
bouts = calloc(threads, sizeof(*bouts));
for (i = 0; i < threads; ++i) {
if (!(tls[i] = malloc(64)) ||
(stack[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) == MAP_FAILED) {
kprintf("error: mmap(%d) failed %m\n", i);
exit(1);
}
}
LoadRelationships(argc, argv);
Crunch();
Explore();
@ -466,15 +449,12 @@ int main(int argc, char *argv[]) {
CHECK_NE(-1, close(fd));
CHECK_NE(-1, rename(path, out));
for (i = 0; i < threads; ++i) {
munmap(stack[i], GetStackSize());
free(bouts[i]);
free(tls[i]);
}
free(strings.p);
free(edges.p);
free(sauces);
free(stack);
free(bouts);
free(tls);
free(th);
return 0;
}

View file

@ -1,78 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/struct/sigaction.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/log/log.h"
#include "libc/mem/mem.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/gc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/sysconf.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sig.h"
#include "libc/time/time.h"
volatile bool gotctrlc;
void GotCtrlC(int sig) {
gotctrlc = true;
}
int Worker(void *arg) {
uint8_t *p;
unsigned x = 0;
struct sigaction sa = {.sa_handler = GotCtrlC};
sigaction(SIGINT, &sa, 0);
for (;;) {
for (p = _base; p < _end; ++p) {
x += *p;
if (gotctrlc) {
return x | x >> 8 | x >> 16 | x >> 24;
}
}
}
}
int main(int argc, char *argv[]) {
char **tls;
int i, n, prot, flags;
ShowCrashReports();
n = GetCpuCount();
tls = gc(malloc(n * sizeof(*tls)));
for (i = 0; i < n; ++i) {
prot = PROT_READ | PROT_WRITE;
flags = MAP_STACK | MAP_ANONYMOUS;
tls[i] = __initialize_tls(malloc(64));
clone(Worker, mmap(0, GetStackSize(), prot, flags, -1, 0), GetStackSize(),
CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
0, 0, tls[i], 64, (int *)(tls[i] + 0x38));
}
while (!gotctrlc) {
usleep(1000);
}
for (i = 0; i < n; ++i) {
_wait0((int *)(tls[i] + 0x38));
free(tls[i]);
}
}

View file

@ -49,6 +49,7 @@ TOOL_NET_DIRECTDEPS = \
LIBC_SYSV \
LIBC_SYSV_CALLS \
LIBC_TIME \
LIBC_THREAD \
LIBC_TINYMATH \
LIBC_UNICODE \
LIBC_X \

View file

@ -36,7 +36,6 @@
#include "libc/intrin/kprintf.h"
#include "libc/intrin/nomultics.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/intrin/wait0.internal.h"
#include "libc/log/check.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
@ -86,6 +85,7 @@
#include "libc/sysv/consts/termios.h"
#include "libc/sysv/consts/w.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/spawn.h"
#include "libc/x/x.h"
#include "libc/zip.h"
#include "net/http/escape.h"
@ -421,7 +421,6 @@ static lua_State *GL;
static lua_State *YL;
static char *content;
static uint8_t *zmap;
static char *repltls;
static uint8_t *zbase;
static uint8_t *zcdir;
static size_t hdrsize;
@ -431,7 +430,6 @@ static char *replstack;
static reader_f reader;
static writer_f writer;
static char *extrahdrs;
static char *monitortls;
static char *luaheaderp;
static const char *zpath;
static const char *brand;
@ -454,6 +452,8 @@ static const char *launchbrowser;
static const char *referrerpolicy;
static ssize_t (*generator)(struct iovec[3]);
static struct spawn replth;
static struct spawn monitorth;
static struct Buffer inbuf_actual;
static struct Buffer inbuf;
static struct Buffer oldin;
@ -6461,7 +6461,7 @@ static int ExitWorker(void) {
}
if (monitortty) {
terminatemonitor = true;
_wait0((int *)(monitortls + 0x38));
_join(&monitorth);
}
_Exit(0);
}
@ -6482,7 +6482,7 @@ static int EnableSandbox(void) {
}
}
static int MemoryMonitor(void *arg) {
static int MemoryMonitor(void *arg, int tid) {
static struct termios oldterm;
static int tty;
sigset_t ss;
@ -6637,23 +6637,9 @@ static int MemoryMonitor(void *arg) {
}
static void MonitorMemory(void) {
if ((monitortls = malloc(64))) {
if ((monitorstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)) != MAP_FAILED) {
if (clone(MemoryMonitor, monitorstack, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_SETTLS | CLONE_CHILD_SETTID |
CLONE_CHILD_CLEARTID,
0, 0, __initialize_tls(monitortls), 64,
(int *)(monitortls + 0x38)) != -1) {
return;
}
munmap(monitorstack, GetStackSize());
}
free(monitortls);
if (_spawn(MemoryMonitor, 0, &monitorth) == -1) {
WARNF("(memv) failed to start memory monitor %m");
}
WARNF("(memv) failed to start memory monitor %m");
monitortty = 0;
}
static int HandleConnection(size_t i) {
@ -7029,7 +7015,7 @@ static void ReplEventLoop(void) {
polls[0].fd = -1;
}
static int WindowsReplThread(void *arg) {
static int WindowsReplThread(void *arg, int tid) {
int sig;
lua_State *L = GL;
DEBUGF("(repl) started windows thread");
@ -7289,16 +7275,7 @@ void RedBean(int argc, char *argv[]) {
if (daemonize || uniprocess || !linenoiseIsTerminal()) {
EventLoop(HEARTBEAT);
} else if (IsWindows()) {
CHECK_NE(MAP_FAILED, (repltls = malloc(64)));
CHECK_NE(MAP_FAILED,
(replstack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE,
MAP_STACK | MAP_ANONYMOUS, -1, 0)));
CHECK_NE(
-1,
clone(WindowsReplThread, replstack, GetStackSize(),
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
CLONE_SETTLS | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
0, 0, __initialize_tls(repltls), 64, (int *)(repltls + 0x38)));
CHECK_NE(-1, _spawn(WindowsReplThread, 0, &replth));
EventLoop(100);
} else {
ReplEventLoop();
@ -7315,19 +7292,11 @@ void RedBean(int argc, char *argv[]) {
}
if (!isexitingworker) {
if (!IsTiny()) {
if (monitortty) {
terminatemonitor = true;
_wait0((int *)(monitortls + 0x38));
munmap(monitorstack, GetStackSize());
free(monitortls);
}
terminatemonitor = true;
_join(&monitorth);
}
#ifndef STATIC
if (repltls) {
_wait0((int *)(repltls + 0x38));
munmap(replstack, GetStackSize());
free(repltls);
}
_join(&replth);
#endif
}
if (!isexitingworker) {
@ -7349,11 +7318,9 @@ int main(int argc, char *argv[]) {
return 0;
CloseServerFds();
}
if (repltls) {
free(repltls);
linenoiseDisableRawMode();
linenoiseHistoryFree();
}
_join(&replth);
linenoiseDisableRawMode();
linenoiseHistoryFree();
}
CheckForMemoryLeaks();
}

View file

@ -104,7 +104,7 @@ int y_; /* -y HEIGHT [in flexidecimal] */
#define Mode BEST
#if Mode == BEST
#define MC 9u /* log2(#) of color combos to consider */
#define MC 9u /* log2(#) of color combos to consider */
#define GN 35u /* # of glyphs to consider */
#elif Mode == FAST
#define MC 6u
@ -114,10 +114,10 @@ int y_; /* -y HEIGHT [in flexidecimal] */
#define GN 25u
#endif
#define CN 3u /* # channels (rgb) */
#define YS 8u /* row stride -or- block height */
#define XS 4u /* column stride -or- block width */
#define GT 44u /* total glyphs */
#define CN 3u /* # channels (rgb) */
#define YS 8u /* row stride -or- block height */
#define XS 4u /* column stride -or- block width */
#define GT 44u /* total glyphs */
#define BN (YS * XS) /* # scalars in block/glyph plane */
#define PHIPRIME 0x9E3779B1u
@ -434,7 +434,7 @@ static void PrintImage(unsigned yn, unsigned xn,
char *v, *vt;
size = yn * (xn * (32 + (2 + (1 + 3) * 3) * 2 + 1 + 3)) * 1 + 5 + 1;
size = ROUNDUP(size, FRAMESIZE);
CHECK_NE(MAP_FAILED, (vt = mapanon(size)));
CHECK_NOTNULL((vt = _mapanon(size)));
v = RenderImage(vt, yn, xn, rgb);
*v++ = '\r';
*v++ = 033;
@ -532,8 +532,8 @@ static void LoadFile(const char *path, size_t yn, size_t xn, void *rgb) {
CHECK_EQ(CN, 3);
data2size = ROUNDUP(sizeof(float) * goty * gotx * CN, FRAMESIZE);
data3size = ROUNDUP(sizeof(float) * yn * YS * xn * XS * CN, FRAMESIZE);
CHECK_NE(MAP_FAILED, (data2 = mapanon(data2size)));
CHECK_NE(MAP_FAILED, (data3 = mapanon(data3size)));
CHECK_NOTNULL((data2 = _mapanon(data2size)));
CHECK_NOTNULL((data3 = _mapanon(data3size)));
rgb2lin(goty * gotx * CN, data2, data);
lanczos3(yn * YS, xn * XS, data3, goty, gotx, data2, gotx * 3);
rgb2std(yn * YS * xn * XS * CN, rgb, data3);
@ -603,7 +603,7 @@ int main(int argc, char *argv[]) {
// FIXME: on the conversion stage should do 2Y because of halfblocks
// printf( "filename >%s<\tx >%d<\ty >%d<\n\n", filename, x_, y_);
size = y_ * YS * x_ * XS * CN;
CHECK_NE(MAP_FAILED, (rgb = mapanon(ROUNDUP(size, FRAMESIZE))));
CHECK_NOTNULL((rgb = _mapanon(ROUNDUP(size, FRAMESIZE))));
for (i = optind; i < argc; ++i) {
if (!argv[i]) continue;
if (m_) {

View file

@ -37,7 +37,7 @@ forceinline void ConvolveGradient(unsigned yn, unsigned xn,
size_t size;
unsigned y, x, i, j, k;
float py[4], px[4], (*tmp)[yn][xn][4];
tmp = mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE)));
tmp = _mapanon((size = ROUNDUP(sizeof(float) * 4 * xn * yn, FRAMESIZE)));
for (y = 0; y < yn - KW + 1; ++y) {
for (x = 0; x < xn - KW + 1; ++x) {
for (k = 0; k < 4; ++k) py[k] = 0;

View file

@ -751,7 +751,7 @@ static void RasterIt(void) {
static bool once;
static void *buf;
if (!once) {
buf = mapanon(ROUNDUP(fb0_.size, FRAMESIZE));
buf = _mapanon(ROUNDUP(fb0_.size, FRAMESIZE));
once = true;
}
WriteToFrameBuffer(fb0_.vscreen.yres_virtual, fb0_.vscreen.xres_virtual, buf,