diff --git a/examples/greenbean.c b/examples/greenbean.c index fe5ac739c..932cb1560 100644 --- a/examples/greenbean.c +++ b/examples/greenbean.c @@ -18,6 +18,7 @@ #include "libc/fmt/conv.h" #include "libc/fmt/itoa.h" #include "libc/intrin/kprintf.h" +#include "libc/intrin/threaded.h" #include "libc/log/check.h" #include "libc/log/log.h" #include "libc/macros.internal.h" @@ -120,6 +121,7 @@ int Worker(void *id) { setsockopt(server, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes)); setsockopt(server, SOL_TCP, TCP_FASTOPEN, &yes, sizeof(yes)); setsockopt(server, SOL_TCP, TCP_QUICKACK, &yes, sizeof(yes)); + errno = 0; if (bind(server, &addr, sizeof(addr)) == -1) { if (LOGGING) kprintf("%s() failed %m\n", "socket"); @@ -265,11 +267,13 @@ int main(int argc, char *argv[]) { if (!threads) threads = GetCpuCount(); workers = threads; for (i = 0; i < threads; ++i) { + char *tls = __initialize_tls(malloc(64)); void *stack = mmap(0, 65536, PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0); - clone(Worker, stack, 65536, - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, - (void *)(intptr_t)i, 0, 0, 0, 0); + CHECK_NE(-1, clone(Worker, stack, 65536, + CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_SETTLS, + (void *)(intptr_t)i, 0, tls, 64, 0)); } status = ""; while (workers) { diff --git a/libc/calls/close.c b/libc/calls/close.c index 13cfe7b21..ef1a248fe 100644 --- a/libc/calls/close.c +++ b/libc/calls/close.c @@ -20,6 +20,8 @@ #include "libc/calls/calls.h" #include "libc/calls/internal.h" #include "libc/calls/strace.internal.h" +#include "libc/intrin/kprintf.h" +#include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" #include "libc/sock/internal.h" #include "libc/sysv/errfuns.h" @@ -46,6 +48,7 @@ */ int close(int fd) { int rc; + _spinlock(&__fds_lock); if (fd == -1) { rc = 0; } else if (fd < 0) { @@ -74,9 +77,10 @@ int close(int fd) { } } if (!__vforked) { - __releasefd(fd); + __releasefd_unlocked(fd); } } + _spunlock(&__fds_lock); STRACE("%s(%d) → %d% m", "close", fd, rc); return rc; } diff --git a/libc/calls/dup-nt.c b/libc/calls/dup-nt.c index ba52b9e20..865e28025 100644 --- a/libc/calls/dup-nt.c +++ b/libc/calls/dup-nt.c @@ -20,6 +20,7 @@ #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/calls/internal.h" +#include "libc/intrin/spinlock.h" #include "libc/mem/mem.h" #include "libc/nt/files.h" #include "libc/nt/runtime.h" @@ -32,25 +33,37 @@ * Implements dup(), dup2(), dup3(), and F_DUPFD for Windows. */ textwindows int sys_dup_nt(int oldfd, int newfd, int flags, int start) { - int64_t proc, handle; + int64_t rc, proc, handle; // validate the api usage if (oldfd < 0) return einval(); if (flags & ~O_CLOEXEC) return einval(); + + _spinlock(&__fds_lock); + if (oldfd >= g_fds.n || (g_fds.p[oldfd].kind != kFdFile && g_fds.p[oldfd].kind != kFdSocket && g_fds.p[oldfd].kind != kFdConsole)) { + _spunlock(&__fds_lock); return ebadf(); } // allocate a new file descriptor if (newfd == -1) { - if ((newfd = __reservefd(start)) == -1) { + if ((newfd = __reservefd_unlocked(start)) == -1) { + _spunlock(&__fds_lock); return -1; } } else { - if (__ensurefds(newfd) == -1) return -1; - if (g_fds.p[newfd].kind) close(newfd); + if (__ensurefds_unlocked(newfd) == -1) { + _spunlock(&__fds_lock); + return -1; + } + if (g_fds.p[newfd].kind) { + _spunlock(&__fds_lock); + close(newfd); + _spinlock(&__fds_lock); + } g_fds.p[newfd].kind = kFdReserved; } @@ -80,9 +93,12 @@ textwindows int sys_dup_nt(int oldfd, int newfd, int flags, int start) { if (g_fds.p[oldfd].worker) { g_fds.p[newfd].worker = weaken(RefNtStdinWorker)(g_fds.p[oldfd].worker); } - return newfd; + rc = newfd; } else { __releasefd(newfd); - return __winerr(); + rc = __winerr(); } + + _spunlock(&__fds_lock); + return rc; } diff --git a/libc/calls/g_sighandrvas.c b/libc/calls/g_sighandrvas.c index 97964a95d..e7b259cb7 100644 --- a/libc/calls/g_sighandrvas.c +++ b/libc/calls/g_sighandrvas.c @@ -18,6 +18,6 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/internal.h" -_Alignas(64) char __sig_lock; +_Alignas(64) int __sig_lock; unsigned __sighandrvas[NSIG]; unsigned __sighandflags[NSIG]; diff --git a/libc/calls/internal.h b/libc/calls/internal.h index e656c947b..5c4457c67 100644 --- a/libc/calls/internal.h +++ b/libc/calls/internal.h @@ -76,8 +76,8 @@ struct Fds { extern const struct Fd kEmptyFd; hidden extern int __vforked; -hidden extern char __fds_lock; -hidden extern char __sig_lock; +hidden extern int __fds_lock; +hidden extern int __sig_lock; hidden extern bool __time_critical; hidden extern unsigned __sighandrvas[NSIG]; hidden extern unsigned __sighandflags[NSIG]; @@ -85,8 +85,11 @@ hidden extern struct Fds g_fds; hidden extern const struct NtSecurityAttributes kNtIsInheritable; int __reservefd(int) hidden; +int __reservefd_unlocked(int) hidden; void __releasefd(int) hidden; +void __releasefd_unlocked(int) hidden; int __ensurefds(int) hidden; +int __ensurefds_unlocked(int) hidden; int64_t __getfdhandleactual(int) hidden; void __printfds(void) hidden; diff --git a/libc/calls/interrupts-nt.c b/libc/calls/interrupts-nt.c index a77b28bfd..b64e78198 100644 --- a/libc/calls/interrupts-nt.c +++ b/libc/calls/interrupts-nt.c @@ -23,11 +23,16 @@ #include "libc/calls/strace.internal.h" #include "libc/calls/struct/sigaction.h" #include "libc/dce.h" +#include "libc/intrin/spinlock.h" textwindows bool _check_interrupts(bool restartable, struct Fd *fd) { + bool res; if (__time_critical) return false; + if (_trylock(&__fds_lock)) return false; if (weaken(_check_sigalrm)) weaken(_check_sigalrm)(); if (weaken(_check_sigchld)) weaken(_check_sigchld)(); if (fd && weaken(_check_sigwinch)) weaken(_check_sigwinch)(fd); - return weaken(__sig_check) && weaken(__sig_check)(restartable); + res = weaken(__sig_check) && weaken(__sig_check)(restartable); + _spunlock(&__fds_lock); + return res; } diff --git a/libc/calls/open-nt.c b/libc/calls/open-nt.c index 1955a5d18..706edddf3 100644 --- a/libc/calls/open-nt.c +++ b/libc/calls/open-nt.c @@ -20,6 +20,7 @@ #include "libc/calls/internal.h" #include "libc/calls/ntmagicpaths.internal.h" #include "libc/calls/strace.internal.h" +#include "libc/intrin/spinlock.h" #include "libc/nt/createfile.h" #include "libc/nt/enum/accessmask.h" #include "libc/nt/enum/creationdisposition.h" @@ -87,14 +88,17 @@ textwindows ssize_t sys_open_nt(int dirfd, const char *file, uint32_t flags, int32_t mode) { int fd; ssize_t rc; - if ((fd = __reservefd(-1)) == -1) return -1; - if ((flags & O_ACCMODE) == O_RDWR && !strcmp(file, kNtMagicPaths.devtty)) { - rc = sys_open_nt_console(dirfd, &kNtMagicPaths, flags, mode, fd); - } else { - rc = sys_open_nt_file(dirfd, file, flags, mode, fd); - } - if (rc == -1) { - __releasefd(fd); + _spinlock(&__fds_lock); + if ((rc = fd = __reservefd_unlocked(-1)) != -1) { + if ((flags & O_ACCMODE) == O_RDWR && !strcmp(file, kNtMagicPaths.devtty)) { + rc = sys_open_nt_console(dirfd, &kNtMagicPaths, flags, mode, fd); + } else { + rc = sys_open_nt_file(dirfd, file, flags, mode, fd); + } + if (rc == -1) { + __releasefd_unlocked(fd); + } + _spunlock(&__fds_lock); } return rc; } diff --git a/libc/calls/pipe-nt.c b/libc/calls/pipe-nt.c index 0c0db0823..fde3fa4a8 100644 --- a/libc/calls/pipe-nt.c +++ b/libc/calls/pipe-nt.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/internal.h" +#include "libc/intrin/spinlock.h" #include "libc/nt/createfile.h" #include "libc/nt/enum/accessmask.h" #include "libc/nt/enum/creationdisposition.h" @@ -33,9 +34,14 @@ textwindows int sys_pipe_nt(int pipefd[2], unsigned flags) { int reader, writer; char16_t pipename[64]; CreatePipeName(pipename); - if ((reader = __reservefd(-1)) == -1) return -1; - if ((writer = __reservefd(-1)) == -1) { - __releasefd(reader); + _spinlock(&__fds_lock); + if ((reader = __reservefd_unlocked(-1)) == -1) { + _spunlock(&__fds_lock); + return -1; + } + if ((writer = __reservefd_unlocked(-1)) == -1) { + __releasefd_unlocked(reader); + _spunlock(&__fds_lock); return -1; } if (~flags & O_DIRECT) { @@ -58,12 +64,14 @@ textwindows int sys_pipe_nt(int pipefd[2], unsigned flags) { g_fds.p[writer].handle = hout; pipefd[0] = reader; pipefd[1] = writer; + _spunlock(&__fds_lock); return 0; } else { CloseHandle(hin); } } - __releasefd(writer); - __releasefd(reader); + __releasefd_unlocked(writer); + __releasefd_unlocked(reader); + _spunlock(&__fds_lock); return -1; } diff --git a/libc/calls/poll-nt.c b/libc/calls/poll-nt.c index b8664c48a..a9536e4c1 100644 --- a/libc/calls/poll-nt.c +++ b/libc/calls/poll-nt.c @@ -44,7 +44,7 @@ #include "libc/sysv/consts/sig.h" #include "libc/sysv/errfuns.h" -_Alignas(64) static char poll_lock; +_Alignas(64) static int poll_lock; /** * Polls on the New Technology. diff --git a/libc/calls/reservefd.c b/libc/calls/reservefd.c index a68a56e7c..fd266d361 100644 --- a/libc/calls/reservefd.c +++ b/libc/calls/reservefd.c @@ -32,10 +32,9 @@ /** * Grows file descriptor array memory if needed. */ -int __ensurefds(int fd) { +int __ensurefds_unlocked(int fd) { size_t n1, n2; struct Fd *p1, *p2; - _spinlock(&__fds_lock); n1 = g_fds.n; if (fd >= n1) { STRACE("__ensurefds(%d) extending", fd); @@ -48,7 +47,7 @@ int __ensurefds(int fd) { g_fds.p = p2; g_fds.n = n2; if (p1 != g_fds.__init_p) { - weaken(free)(p1); + __cxa_atexit(free, p1, 0); } } else { fd = enomem(); @@ -57,32 +56,44 @@ int __ensurefds(int fd) { fd = emfile(); } } + return fd; +} + +/** + * Grows file descriptor array memory if needed. + */ +int __ensurefds(int fd) { + _spinlock(&__fds_lock); + fd = __ensurefds_unlocked(fd); _spunlock(&__fds_lock); return fd; } +/** + * Finds open file descriptor slot. + */ +int __reservefd_unlocked(int start) { + int fd; + for (fd = g_fds.f; fd < g_fds.n; ++fd) { + if (!g_fds.p[fd].kind) { + break; + } + } + fd = __ensurefds_unlocked(fd); + bzero(g_fds.p + fd, sizeof(*g_fds.p)); + g_fds.p[fd].kind = kFdReserved; + return fd; +} + /** * Finds open file descriptor slot. */ int __reservefd(int start) { int fd; - for (;;) { - _spinlock(&__fds_lock); - fd = start < 0 ? g_fds.f : start; - while (fd < g_fds.n && g_fds.p[fd].kind) ++fd; - if (fd < g_fds.n) { - g_fds.f = fd + 1; - bzero(g_fds.p + fd, sizeof(*g_fds.p)); - g_fds.p[fd].kind = kFdReserved; - _spunlock(&__fds_lock); - return fd; - } else { - _spunlock(&__fds_lock); - if (__ensurefds(fd) == -1) { - return -1; - } - } - } + _spinlock(&__fds_lock); + fd = __reservefd_unlocked(start); + _spunlock(&__fds_lock); + return fd; } /** @@ -91,9 +102,12 @@ int __reservefd(int start) { static void FreeFds(void) { int i; NTTRACE("FreeFds()"); + _spinlock(&__fds_lock); for (i = 3; i < g_fds.n; ++i) { if (g_fds.p[i].kind) { + _spunlock(&__fds_lock); close(i); + _spinlock(&__fds_lock); } } if (g_fds.p != g_fds.__init_p) { @@ -102,6 +116,7 @@ static void FreeFds(void) { g_fds.p = g_fds.__init_p; g_fds.n = ARRAYLEN(g_fds.__init_p); } + _spunlock(&__fds_lock); } static textstartup void FreeFdsInit(void) { diff --git a/libc/calls/sigaction.c b/libc/calls/sigaction.c index eb36e030d..73f8fd4d3 100644 --- a/libc/calls/sigaction.c +++ b/libc/calls/sigaction.c @@ -223,7 +223,6 @@ static int __sigaction(int sig, const struct sigaction *act, rc = 0; } if (rc != -1 && !__vforked) { - _spinlock(&__sig_lock); if (oldact) { oldrva = __sighandrvas[sig]; oldact->sa_sigaction = (sigaction_f)( @@ -233,7 +232,6 @@ static int __sigaction(int sig, const struct sigaction *act, __sighandrvas[sig] = rva; __sighandflags[sig] = act->sa_flags; } - _spunlock(&__sig_lock); } return rc; } @@ -447,7 +445,9 @@ int sigaction(int sig, const struct sigaction *act, struct sigaction *oldact) { if (sig == SIGKILL || sig == SIGSTOP) { rc = einval(); } else { + _spinlock(&__sig_lock); rc = __sigaction(sig, act, oldact); + _spunlock(&__sig_lock); } STRACE("sigaction(%G, %s, [%s]) → %d% m", sig, DescribeSigaction(buf[0], sizeof(buf[0]), 0, act), diff --git a/libc/calls/sigchld-nt.c b/libc/calls/sigchld-nt.c index 6d8b7a073..8ca410c0d 100644 --- a/libc/calls/sigchld-nt.c +++ b/libc/calls/sigchld-nt.c @@ -53,7 +53,7 @@ void _check_sigchld(void) { if (__sighandflags[SIGCHLD] & SA_NOCLDWAIT) { STRACE("SIGCHILD SA_NOCLDWAIT fd=%d handle=%ld", pids[i], handles[i]); CloseHandle(handles[i]); - __releasefd(pids[i]); + __releasefd_unlocked(pids[i]); } g_fds.p[pids[i]].zombie = true; __sig_add(SIGCHLD, CLD_EXITED); diff --git a/libc/calls/wait4-nt.c b/libc/calls/wait4-nt.c index 14e00a01b..0d4fc2f5e 100644 --- a/libc/calls/wait4-nt.c +++ b/libc/calls/wait4-nt.c @@ -25,6 +25,7 @@ #include "libc/calls/struct/rusage.h" #include "libc/fmt/conv.h" #include "libc/intrin/kprintf.h" +#include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" #include "libc/nt/accounting.h" #include "libc/nt/enum/accessmask.h" @@ -57,6 +58,7 @@ static textwindows int sys_wait4_nt_impl(int pid, int *opt_out_wstatus, struct NtProcessMemoryCountersEx memcount; struct NtFileTime createfiletime, exitfiletime, kernelfiletime, userfiletime; if (_check_interrupts(true, g_fds.p)) return eintr(); + _spinlock(&__fds_lock); if (pid != -1 && pid != 0) { if (pid < 0) { /* XXX: this is sloppy */ @@ -67,15 +69,17 @@ static textwindows int sys_wait4_nt_impl(int pid, int *opt_out_wstatus, if (!__isfdopen(pid) && (handle = OpenProcess(kNtSynchronize | kNtProcessQueryInformation, true, pid))) { - if ((pid = __reservefd(-1)) != -1) { + if ((pid = __reservefd_unlocked(-1)) != -1) { g_fds.p[pid].kind = kFdProcess; g_fds.p[pid].handle = handle; g_fds.p[pid].flags = O_CLOEXEC; } else { + _spunlock(&__fds_lock); CloseHandle(handle); return echild(); } } else { + _spunlock(&__fds_lock); return echild(); } } @@ -84,8 +88,12 @@ static textwindows int sys_wait4_nt_impl(int pid, int *opt_out_wstatus, count = 1; } else { count = __sample_pids(pids, handles, false); - if (!count) return echild(); + if (!count) { + _spunlock(&__fds_lock); + return echild(); + } } + _spunlock(&__fds_lock); for (;;) { if (_check_interrupts(true, 0)) return eintr(); dwExitCode = kNtStillActive; diff --git a/libc/intrin/asan.c b/libc/intrin/asan.c index c6145da37..d88aaa3db 100644 --- a/libc/intrin/asan.c +++ b/libc/intrin/asan.c @@ -27,9 +27,11 @@ #include "libc/dce.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/asancodes.h" +#include "libc/intrin/cmpxchg.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/lockcmpxchg.h" #include "libc/intrin/nomultics.internal.h" +#include "libc/intrin/spinlock.h" #include "libc/log/backtrace.internal.h" #include "libc/log/internal.h" #include "libc/log/libfatal.internal.h" @@ -154,7 +156,8 @@ struct ReportOriginHeap { int z; }; -bool __asan_noreentry; +static int __asan_noreentry; +_Alignas(64) static int __asan_lock; static struct AsanMorgue __asan_morgue; #define __asan_unreachable() \ @@ -835,30 +838,27 @@ dontdiscard __asan_die_f *__asan_report_memory_fault(void *addr, int size, } void *__asan_morgue_add(void *p) { + int i; void *r; - int i, j; - for (;;) { - i = __asan_morgue.i; - j = (i + 1) & (ARRAYLEN(__asan_morgue.p) - 1); - if (_lockcmpxchg(&__asan_morgue.i, i, j)) { - r = __asan_morgue.p[i]; - __asan_morgue.p[i] = p; - return r; - } - } + _spinlock_optimistic(&__asan_lock); + i = __asan_morgue.i++ & (ARRAYLEN(__asan_morgue.p) - 1); + r = __asan_morgue.p[i]; + __asan_morgue.p[i] = p; + _spunlock(&__asan_lock); + return r; } static void __asan_morgue_flush(void) { int i; void *p; + _spinlock_optimistic(&__asan_lock); for (i = 0; i < ARRAYLEN(__asan_morgue.p); ++i) { - p = __asan_morgue.p[i]; - if (_lockcmpxchg(__asan_morgue.p + i, p, 0)) { - if (weaken(dlfree)) { - weaken(dlfree)(p); - } + if (weaken(dlfree)) { + weaken(dlfree)(__asan_morgue.p[i]); } + __asan_morgue.p[i] = 0; } + _spunlock(&__asan_lock); } static size_t __asan_user_size(size_t n) { @@ -1197,12 +1197,13 @@ void __asan_evil(uint8_t *addr, int size, const char *s1, const char *s2) { struct AsanTrace tr; __asan_rawtrace(&tr, __builtin_frame_address(0)); kprintf( - "WARNING: ASAN error during %s bad %d byte %s at %x bt %x %x %x %x %x\n", + "WARNING: ASAN %s %s bad %d byte %s at %x bt %x %x %x %x %x\n", + __asan_noreentry == gettid() ? "error during" : "multi-threaded crash", s1, size, s2, addr, tr.p[0], tr.p[1], tr.p[2], tr.p[3], tr.p[4], tr.p[5]); } void __asan_report_load(uint8_t *addr, int size) { - if (_lockcmpxchg(&__asan_noreentry, false, true)) { + if (_lockcmpxchg(&__asan_noreentry, 0, gettid())) { if (!__vforked) { __asan_report_memory_fault(addr, size, "load")(); __asan_unreachable(); @@ -1215,7 +1216,7 @@ void __asan_report_load(uint8_t *addr, int size) { } void __asan_report_store(uint8_t *addr, int size) { - if (_lockcmpxchg(&__asan_noreentry, false, true)) { + if (_lockcmpxchg(&__asan_noreentry, 0, gettid())) { if (!__vforked) { __asan_report_memory_fault(addr, size, "store")(); __asan_unreachable(); diff --git a/libc/intrin/asan.internal.h b/libc/intrin/asan.internal.h index 6bcbb2700..d74411c85 100644 --- a/libc/intrin/asan.internal.h +++ b/libc/intrin/asan.internal.h @@ -16,8 +16,6 @@ struct AsanFault { const signed char *shadow; }; -extern bool __asan_noreentry; - void __asan_unpoison(long, long); void __asan_poison(long, long, signed char); void __asan_verify(const void *, size_t); diff --git a/libc/intrin/cxaatexit.c b/libc/intrin/cxaatexit.c index a71602958..7fb83e8a0 100644 --- a/libc/intrin/cxaatexit.c +++ b/libc/intrin/cxaatexit.c @@ -19,6 +19,7 @@ #include "libc/assert.h" #include "libc/bits/weaken.h" #include "libc/calls/strace.internal.h" +#include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/bsr.h" @@ -28,6 +29,8 @@ STATIC_YOINK("__cxa_finalize"); +static int __cxa_lock; + /** * Adds global destructor. * @@ -47,6 +50,7 @@ noasan int __cxa_atexit(void *fp, void *arg, void *pred) { unsigned i; struct CxaAtexitBlock *b, *b2; _Static_assert(ATEXIT_MAX == CHAR_BIT * sizeof(b->mask), ""); + _spinlock(&__cxa_lock); b = __cxa_blocks.p; if (!b) b = __cxa_blocks.p = &__cxa_blocks.root; if (!~b->mask) { @@ -55,6 +59,7 @@ noasan int __cxa_atexit(void *fp, void *arg, void *pred) { b2->next = b; __cxa_blocks.p = b = b2; } else { + _spunlock(&__cxa_lock); return enomem(); } } @@ -64,5 +69,6 @@ noasan int __cxa_atexit(void *fp, void *arg, void *pred) { b->p[i].fp = fp; b->p[i].arg = arg; b->p[i].pred = pred; + _spunlock(&__cxa_lock); return 0; } diff --git a/libc/intrin/exit1.greg.c b/libc/intrin/exit1.greg.c index 90e43e5f1..084379892 100644 --- a/libc/intrin/exit1.greg.c +++ b/libc/intrin/exit1.greg.c @@ -19,7 +19,6 @@ #include "libc/calls/strace.internal.h" #include "libc/dce.h" #include "libc/intrin/setjmp.internal.h" -#include "libc/intrin/winthread.internal.h" #include "libc/nt/thread.h" #include "libc/runtime/runtime.h" #include "libc/sysv/consts/nr.h" @@ -34,7 +33,7 @@ */ privileged wontreturn void _Exit1(int rc) { struct WinThread *wt; - /* STRACE("_Exit1(%d)", rc); */ + STRACE("_Exit1(%d)", rc); if (!IsWindows() && !IsMetal()) { register long r10 asm("r10") = 0; asm volatile("syscall" diff --git a/libc/intrin/g_fds.c b/libc/intrin/g_fds.c index d071e00b0..9f575dcd6 100644 --- a/libc/intrin/g_fds.c +++ b/libc/intrin/g_fds.c @@ -25,7 +25,7 @@ STATIC_YOINK("_init_g_fds"); struct Fds g_fds; -_Alignas(64) char __fds_lock; +_Alignas(64) int __fds_lock; textstartup void InitializeFileDescriptors(void) { struct Fds *fds; diff --git a/libc/intrin/gettid.greg.c b/libc/intrin/gettid.greg.c index 28c5cc548..2db48b313 100644 --- a/libc/intrin/gettid.greg.c +++ b/libc/intrin/gettid.greg.c @@ -18,7 +18,7 @@ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/dce.h" -#include "libc/intrin/tls.h" +#include "libc/intrin/threaded.h" #include "libc/nt/thread.h" /** diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index 1915f8afb..2bcc92fe0 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -31,7 +31,7 @@ #include "libc/intrin/lockcmpxchg.h" #include "libc/intrin/nomultics.internal.h" #include "libc/intrin/spinlock.h" -#include "libc/intrin/threaded.internal.h" +#include "libc/intrin/threaded.h" #include "libc/limits.h" #include "libc/log/internal.h" #include "libc/macros.internal.h" diff --git a/libc/intrin/releasefd.c b/libc/intrin/releasefd.c index 3b9410d36..660e24fcd 100644 --- a/libc/intrin/releasefd.c +++ b/libc/intrin/releasefd.c @@ -20,11 +20,15 @@ #include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" -void __releasefd(int fd) { - _spinlock(&__fds_lock); +void __releasefd_unlocked(int fd) { if (0 <= fd && fd < g_fds.n) { g_fds.p[fd].kind = 0; g_fds.f = MIN(fd, g_fds.f); } +} + +void __releasefd(int fd) { + _spinlock(&__fds_lock); + __releasefd_unlocked(fd); _spunlock(&__fds_lock); } diff --git a/libc/intrin/spinlock.h b/libc/intrin/spinlock.h index ea18cf58d..e85051b36 100644 --- a/libc/intrin/spinlock.h +++ b/libc/intrin/spinlock.h @@ -2,16 +2,26 @@ #define COSMOPOLITAN_LIBC_INTRIN_SPINLOCK_H_ #include "libc/bits/weaken.h" #include "libc/calls/calls.h" +#include "libc/dce.h" #include "libc/intrin/kprintf.h" +#include "libc/intrin/lockcmpxchg.h" #include "libc/log/backtrace.internal.h" #include "libc/log/log.h" +#include "libc/runtime/symbols.internal.h" + +#if IsModeDbg() && !defined(_SPINLOCK_DEBUG) +#define _SPINLOCK_DEBUG +#endif #if defined(_SPINLOCK_DEBUG) -#define _spinlock(lock) _spinlock_debug(lock) +#define _spinlock(lock) _spinlock_ndebug(lock) +#define _spinlock_ndebug(lock) _spinlock_optimistic(lock) #elif defined(TINY) -#define _spinlock(lock) _spinlock_tiny(lock) +#define _spinlock(lock) _spinlock_tiny(lock) +#define _spinlock_ndebug(lock) _spinlock_tiny(lock) #else -#define _spinlock(lock) _spinlock_optimistic(lock) +#define _spinlock(lock) _spinlock_optimistic(lock) +#define _spinlock_ndebug(lock) _spinlock_optimistic(lock) #endif #define _spunlock(lock) __atomic_clear(lock, __ATOMIC_RELAXED) @@ -44,22 +54,26 @@ } \ } while (0) -#define _spinlock_debug(lock) \ - do { \ - typeof(*(lock)) me, owner; \ - me = gettid(); \ - if (_trylock(lock)) { \ - __atomic_load(lock, &owner, __ATOMIC_RELAXED); \ - if (owner == me) { \ - kprintf("%s:%d: warning: possible spinlock re-entry in %s()\n", \ - __FILE__, __LINE__, __FUNCTION__); \ - if (weaken(ShowBacktrace)) { \ - weaken(ShowBacktrace)(2, 0); \ - } \ - } \ - _spinlock_optimistic(lock); \ - } \ - *lock = me; \ +#define _spinlock_debug(lock) \ + do { \ + typeof(*(lock)) me, owner; \ + unsigned long warntries = 10000000; \ + me = gettid(); \ + if (!_lockcmpxchg(lock, 0, me)) { \ + __atomic_load(lock, &owner, __ATOMIC_RELAXED); \ + if (owner == me) { \ + kprintf("%s:%d: warning: possible re-entry on %s in %s()\n", __FILE__, \ + __LINE__, #lock, __FUNCTION__); \ + } \ + while (!_lockcmpxchg(lock, 0, me)) { \ + if (!--warntries) { \ + warntries = -1; \ + kprintf("%s:%d: warning: possible deadlock on %s in %s()\n", \ + __FILE__, __LINE__, #lock, __FUNCTION__); \ + } \ + __builtin_ia32_pause(); \ + } \ + } \ } while (0) #endif /* COSMOPOLITAN_LIBC_INTRIN_SPINLOCK_H_ */ diff --git a/libc/intrin/threaded.h b/libc/intrin/threaded.h new file mode 100644 index 000000000..1839a3e06 --- /dev/null +++ b/libc/intrin/threaded.h @@ -0,0 +1,16 @@ +#ifndef COSMOPOLITAN_LIBC_INTRIN_THREADED_H_ +#define COSMOPOLITAN_LIBC_INTRIN_THREADED_H_ +#if !(__ASSEMBLER__ + __LINKER__ + 0) +COSMOPOLITAN_C_START_ + +extern bool __threaded; +extern bool __tls_enabled; +extern unsigned __tls_index; + +void *__initialize_tls(char[hasatleast 64]); +void __install_tls(char[hasatleast 64]); +char *__get_tls(void); + +COSMOPOLITAN_C_END_ +#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ +#endif /* COSMOPOLITAN_LIBC_INTRIN_THREADED_H_ */ diff --git a/libc/intrin/threaded.internal.h b/libc/intrin/threaded.internal.h deleted file mode 100644 index 0dff79330..000000000 --- a/libc/intrin/threaded.internal.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_INTRIN_THREADED_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_INTRIN_THREADED_INTERNAL_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -extern bool __hastls; -extern bool __threaded; - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_INTRIN_THREADED_INTERNAL_H_ */ diff --git a/libc/intrin/tls.greg.c b/libc/intrin/tls.greg.c index d78503707..0c77758f3 100644 --- a/libc/intrin/tls.greg.c +++ b/libc/intrin/tls.greg.c @@ -17,76 +17,67 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/assert.h" +#include "libc/calls/calls.h" #include "libc/dce.h" -#include "libc/intrin/tls.h" +#include "libc/errno.h" +#include "libc/intrin/threaded.h" +#include "libc/nt/thread.h" #include "libc/nt/thunk/msabi.h" +#include "libc/sysv/consts/nrlinux.h" -__msabi extern typeof(TlsFree) *const __imp_TlsFree; -__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc; -__msabi extern typeof(TlsGetValue) *const __imp_TlsGetValue; -__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue; +#define __NR_sysarch 0x000000a5 +#define __NR___set_tcb 0x00000149 +#define __NR__lwp_setprivate 0x0000013d +#define __NR_thread_fast_set_cthread_self 0x03000003 /** - * Assigns thread-local storage slot. - * - * This function may for instance be called at startup and the result - * can be assigned to a global static variable; from then on, all the - * threads in your application may pass that value to TlsGetValue, to - * retrieve their thread-local values. - * - * @return index on success, or -1u w/ errno - * @threadsafe + * Initializes thread information block. */ -uint32_t TlsAlloc(void) { - return __imp_TlsAlloc(); +privileged void *__initialize_tls(char tib[hasatleast 64]) { + *(intptr_t *)tib = (intptr_t)tib; + *(intptr_t *)(tib + 0x30) = (intptr_t)tib; + *(int *)(tib + 0x3c) = __errno; + return tib; } /** - * Releases thread-local storage slot. - * @threadsafe + * Installs thread information block on main process. */ -bool32 TlsFree(uint32_t dwTlsIndex) { - return __imp_TlsFree(dwTlsIndex); -} - -/** - * Sets value to thread-local storage slot. - * - * @param dwTlsIndex is something returned by TlsAlloc() - * @return true if successful, otherwise false - * @threadsafe - */ -bool32 TlsSetValue(uint32_t dwTlsIndex, void *lpTlsValue) { - assert(IsWindows()); - if (dwTlsIndex < 64) { - asm("mov\t%1,%%gs:%0" - : "=m"(*((long *)0x1480 + dwTlsIndex)) - : "r"(lpTlsValue)); - return true; +privileged void __install_tls(char tib[hasatleast 64]) { + int ax, dx; + uint64_t magic; + unsigned char *p; + if (IsWindows()) { + if (!__tls_index) { + __tls_index = TlsAlloc(); + } + asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); + } else if (IsFreebsd()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_sysarch), "D"(129), "S"(tib) + : "rcx", "r11", "memory", "cc"); + } else if (IsXnu()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_thread_fast_set_cthread_self), + "D"((intptr_t)tib - 0x30) + : "rcx", "r11", "memory", "cc"); + } else if (IsOpenbsd()) { + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR___set_tcb), "D"(tib) + : "rcx", "r11", "memory", "cc"); + } else if (IsNetbsd()) { + asm volatile("syscall" + : "=a"(ax), "=d"(dx) + : "0"(__NR__lwp_setprivate), "D"(tib) + : "rcx", "r11", "memory", "cc"); } else { - return __imp_TlsSetValue(dwTlsIndex, lpTlsValue); - } -} - -/** - * Retrieves value from thread-local storage slot. - * - * @param dwTlsIndex is something returned by TlsAlloc() - * @return true if successful, otherwise false - * @threadsafe - */ -void *TlsGetValue(uint32_t dwTlsIndex) { - void *lpTlsValue; - assert(IsWindows()); - if (dwTlsIndex < 64) { - asm("mov\t%%gs:%1,%0" - : "=r"(lpTlsValue) - : "m"(*((long *)0x1480 + dwTlsIndex))); - return lpTlsValue; - // // this could also be written as... - // asm("movq\t%%gs:0x30,%0" : "=a"(tib)); - // return (void *)tib[0x1480 / 8 + dwTlsIndex]; - } else { - return __imp_TlsGetValue(dwTlsIndex); + asm volatile("syscall" + : "=a"(ax) + : "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(tib) + : "rcx", "r11", "memory"); } + __tls_enabled = true; } diff --git a/libc/intrin/tls.h b/libc/intrin/tls.h deleted file mode 100644 index 8f539900d..000000000 --- a/libc/intrin/tls.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_INTRIN_TLS_H_ -#define COSMOPOLITAN_LIBC_INTRIN_TLS_H_ -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -uint32_t TlsAlloc(void); -bool32 TlsFree(uint32_t); -bool32 TlsSetValue(uint32_t, void *); -void *TlsGetValue(uint32_t); - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_INTRIN_TLS_H_ */ diff --git a/libc/intrin/winthread.internal.h b/libc/intrin/winthread.internal.h deleted file mode 100644 index 0ea54dbd1..000000000 --- a/libc/intrin/winthread.internal.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef COSMOPOLITAN_LIBC_RUNTIME_WINTHREAD_INTERNAL_H_ -#define COSMOPOLITAN_LIBC_RUNTIME_WINTHREAD_INTERNAL_H_ -#include "libc/intrin/tls.h" -#include "libc/runtime/runtime.h" -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ - -struct WinThread { - uint32_t tid; - int flags; - int *ctid; - int (*func)(void *); - void *arg; -}; - -extern int __winthread; - -static inline struct WinThread *GetWinThread(void) { - return TlsGetValue(__winthread); -} - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ -#endif /* COSMOPOLITAN_LIBC_RUNTIME_WINTHREAD_INTERNAL_H_ */ diff --git a/libc/log/vflogf.c b/libc/log/vflogf.c index 42766b264..0f07c5f6e 100644 --- a/libc/log/vflogf.c +++ b/libc/log/vflogf.c @@ -41,7 +41,7 @@ #define kNontrivialSize (8 * 1000 * 1000) static struct timespec vflogf_ts; -_Alignas(64) static char vflogf_lock; +_Alignas(64) static int vflogf_lock; /** * Takes corrective action if logging is on the fritz. diff --git a/libc/nexgen32e/hastls.c b/libc/nexgen32e/hastls.c deleted file mode 100644 index 84fad017e..000000000 --- a/libc/nexgen32e/hastls.c +++ /dev/null @@ -1,21 +0,0 @@ -/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ -│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ -╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright 2022 Justine Alexandra Roberts Tunney │ -│ │ -│ Permission to use, copy, modify, and/or distribute this software for │ -│ any purpose with or without fee is hereby granted, provided that the │ -│ above copyright notice and this permission notice appear in all copies. │ -│ │ -│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ -│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ -│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ -│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ -│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ -│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ -│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ -│ PERFORMANCE OF THIS SOFTWARE. │ -╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/threaded.internal.h" - -bool __hastls; diff --git a/libc/nexgen32e/nexgen32e.mk b/libc/nexgen32e/nexgen32e.mk index 5c6df13c2..a887887ce 100644 --- a/libc/nexgen32e/nexgen32e.mk +++ b/libc/nexgen32e/nexgen32e.mk @@ -42,7 +42,6 @@ $(LIBC_NEXGEN32E_A).pkg: \ $(LIBC_NEXGEN32E_A_OBJS) \ $(foreach x,$(LIBC_NEXGEN32E_A_DIRECTDEPS),$($(x)_A).pkg) -o/$(MODE)/libc/nexgen32e/hastls.o \ o/$(MODE)/libc/nexgen32e/threaded.o: \ OVERRIDE_CFLAGS += \ $(NO_MAGIC) \ diff --git a/libc/nexgen32e/threaded.c b/libc/nexgen32e/threaded.c index c8589830b..a9b37305f 100644 --- a/libc/nexgen32e/threaded.c +++ b/libc/nexgen32e/threaded.c @@ -16,6 +16,8 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/intrin/threaded.internal.h" +#include "libc/intrin/threaded.h" bool __threaded; +bool __tls_enabled; +unsigned __tls_index; diff --git a/libc/nt/kernel32/TlsAlloc.s b/libc/nt/kernel32/TlsAlloc.s index e39628b00..9a8311965 100644 --- a/libc/nt/kernel32/TlsAlloc.s +++ b/libc/nt/kernel32/TlsAlloc.s @@ -2,7 +2,7 @@ .imp kernel32,__imp_TlsAlloc,TlsAlloc,0 .text.windows -__TlsAlloc: +TlsAlloc: push %rbp mov %rsp,%rbp .profilable @@ -10,5 +10,5 @@ __TlsAlloc: call *__imp_TlsAlloc(%rip) leave ret - .endfn __TlsAlloc,globl + .endfn TlsAlloc,globl .previous diff --git a/libc/nt/kernel32/TlsFree.s b/libc/nt/kernel32/TlsFree.s index 8a66707ad..580308a34 100644 --- a/libc/nt/kernel32/TlsFree.s +++ b/libc/nt/kernel32/TlsFree.s @@ -2,7 +2,7 @@ .imp kernel32,__imp_TlsFree,TlsFree,0 .text.windows -__TlsFree: +TlsFree: push %rbp mov %rsp,%rbp .profilable @@ -11,5 +11,5 @@ __TlsFree: call *__imp_TlsFree(%rip) leave ret - .endfn __TlsFree,globl + .endfn TlsFree,globl .previous diff --git a/libc/nt/kernel32/TlsGetValue.s b/libc/nt/kernel32/TlsGetValue.s index 371ed9084..b4c5fb727 100644 --- a/libc/nt/kernel32/TlsGetValue.s +++ b/libc/nt/kernel32/TlsGetValue.s @@ -2,7 +2,7 @@ .imp kernel32,__imp_TlsGetValue,TlsGetValue,0 .text.windows -__TlsGetValue: +TlsGetValue: push %rbp mov %rsp,%rbp .profilable @@ -11,5 +11,5 @@ __TlsGetValue: call *__imp_TlsGetValue(%rip) leave ret - .endfn __TlsGetValue,globl + .endfn TlsGetValue,globl .previous diff --git a/libc/nt/kernel32/TlsSetValue.s b/libc/nt/kernel32/TlsSetValue.s index 77d63bf4b..c53d538c5 100644 --- a/libc/nt/kernel32/TlsSetValue.s +++ b/libc/nt/kernel32/TlsSetValue.s @@ -2,11 +2,11 @@ .imp kernel32,__imp_TlsSetValue,TlsSetValue,0 .text.windows -__TlsSetValue: +TlsSetValue: push %rbp mov %rsp,%rbp .profilable mov __imp_TlsSetValue(%rip),%rax jmp __sysv2nt - .endfn __TlsSetValue,globl + .endfn TlsSetValue,globl .previous diff --git a/libc/nt/master.sh b/libc/nt/master.sh index caefb97e5..091076566 100755 --- a/libc/nt/master.sh +++ b/libc/nt/master.sh @@ -302,8 +302,8 @@ imp 'EnumerateLocalComputerNamesA' EnumerateLocalComputerNamesA kernel32 3 imp 'EraseTape' EraseTape kernel32 352 imp 'EscapeCommFunction' EscapeCommFunction kernel32 0 imp 'ExecuteUmsThread' ExecuteUmsThread kernel32 354 -imp 'ExitThread' ExitThread kernel32 0 1 imp 'ExitProcess' ExitProcess kernel32 0 1 # a.k.a. RtlExitUserProcess +imp 'ExitThread' ExitThread kernel32 0 1 imp 'ExitVDM' ExitVDM kernel32 357 imp 'ExpandEnvironmentStrings' ExpandEnvironmentStringsW kernel32 0 imp 'ExpandEnvironmentStringsA' ExpandEnvironmentStringsA kernel32 0 @@ -1225,6 +1225,10 @@ imp 'TermsrvSetValueKey' TermsrvSetValueKey kernel32 1441 imp 'TermsrvSyncUserIniFileExt' TermsrvSyncUserIniFileExt kernel32 1442 imp 'Thread32First' Thread32First kernel32 1443 imp 'Thread32Next' Thread32Next kernel32 1444 +imp 'TlsAlloc' TlsAlloc kernel32 0 0 +imp 'TlsFree' TlsFree kernel32 0 1 +imp 'TlsGetValue' TlsGetValue kernel32 0 1 +imp 'TlsSetValue' TlsSetValue kernel32 0 2 imp 'Toolhelp32ReadProcessMemory' Toolhelp32ReadProcessMemory kernel32 1449 imp 'TransactNamedPipe' TransactNamedPipe kernel32 0 7 imp 'TransmitCommChar' TransmitCommChar kernel32 0 @@ -1364,10 +1368,6 @@ imp '__ReOpenFile' ReOpenFile kernel32 0 4 # TODO(jart): 6.2 and highe imp '__RemoveDirectory' RemoveDirectoryW kernel32 0 1 imp '__SetCurrentDirectory' SetCurrentDirectoryW kernel32 0 1 imp '__TerminateProcess' TerminateProcess kernel32 0 2 -imp '__TlsAlloc' TlsAlloc kernel32 0 0 -imp '__TlsFree' TlsFree kernel32 0 1 -imp '__TlsGetValue' TlsGetValue kernel32 0 1 -imp '__TlsSetValue' TlsSetValue kernel32 0 2 imp '__UnmapViewOfFile' UnmapViewOfFile kernel32 0 1 imp '__VirtualProtect' VirtualProtect kernel32 0 4 imp '__WaitForMultipleObjects' WaitForMultipleObjects kernel32 0 4 diff --git a/libc/nt/thread.h b/libc/nt/thread.h index d5a8c3548..72f836b2c 100644 --- a/libc/nt/thread.h +++ b/libc/nt/thread.h @@ -57,6 +57,11 @@ bool32 CancelSynchronousIo(int64_t hThread); bool32 CancelIo(int64_t hFile); bool32 CancelIoEx(int64_t hFile, struct NtOverlapped *opt_lpOverlapped); +uint32_t TlsAlloc(void); +bool32 TlsFree(uint32_t); +bool32 TlsSetValue(uint32_t, void *); +void *TlsGetValue(uint32_t); + #if ShouldUseMsabiAttribute() #include "libc/nt/thunk/thread.inc" #endif /* ShouldUseMsabiAttribute() */ diff --git a/libc/runtime/clone.greg.c b/libc/runtime/clone.greg.c index 990f5960f..d6c6182aa 100644 --- a/libc/runtime/clone.greg.c +++ b/libc/runtime/clone.greg.c @@ -26,9 +26,7 @@ #include "libc/intrin/asan.internal.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/spinlock.h" -#include "libc/intrin/threaded.internal.h" -#include "libc/intrin/tls.h" -#include "libc/intrin/winthread.internal.h" +#include "libc/intrin/threaded.h" #include "libc/nt/runtime.h" #include "libc/nt/thread.h" #include "libc/nt/thunk/msabi.h" @@ -46,58 +44,22 @@ STATIC_YOINK("gettid"); // for kprintf() #define __NR_clone_linux 56 #define __NR__lwp_create 309 #define __NR_getcontext_netbsd 307 -#define __NR__lwp_setprivate 317 #define __NR_bsdthread_create 0x02000168 #define __NR_thread_fast_set_cthread_self 0x03000003 -#define __NR_sysarch 0x000000a5 -#define __NR___set_tcb 0x00000149 #define PTHREAD_START_CUSTOM_XNU 0x01000000 #define LWP_DETACHED 0x00000040 #define LWP_SUSPENDED 0x00000080 -char __tls[512]; -int __errno_global; -extern int __errno_index; +static char tibdefault[64]; -privileged void __setup_tls(void) { - int ax, dx; - uint64_t magic; - unsigned char *p; - *(intptr_t *)__tls = (intptr_t)__tls; - *(intptr_t *)(__tls + 0x30) = (intptr_t)__tls; - *(int *)(__tls + 0x3c) = __errno; - if (IsWindows()) { - __errno_index = TlsAlloc(); - TlsSetValue(__errno_index, (void *)(intptr_t)__errno); - } else if (IsLinux()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_linux_arch_prctl), "D"(ARCH_SET_FS), "S"(__tls) - : "rcx", "r11", "memory"); - } else if (IsFreebsd()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_sysarch), "D"(129), "S"(__tls) - : "rcx", "r11", "memory", "cc"); - } else if (IsXnu()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR_thread_fast_set_cthread_self), - "D"((intptr_t)__tls - 0x30) - : "rcx", "r11", "memory", "cc"); - } else if (IsOpenbsd()) { - asm volatile("syscall" - : "=a"(ax) - : "0"(__NR___set_tcb), "D"(__tls) - : "rcx", "r11", "memory", "cc"); - } else if (IsNetbsd()) { - asm volatile("syscall" - : "=a"(ax), "=d"(dx) - : "0"(__NR__lwp_setprivate), "D"(__tls) - : "rcx", "r11", "memory", "cc"); - } - __hastls = true; -} +struct WinThread { + uint32_t tid; + int flags; + int *ctid; + void *tls; + int (*func)(void *); + void *arg; +}; uint32_t WinThreadThunk(void *warg); asm(".section\t.text.windows,\"ax\",@progbits\n\t" @@ -115,20 +77,19 @@ __attribute__((__used__, __no_reorder__)) static textwindows wontreturn void WinThreadMain(struct WinThread *wt) { int rc; + if (wt->flags & CLONE_SETTLS) { + TlsSetValue(__tls_index, wt->tls); + } if (wt->flags & CLONE_CHILD_SETTID) { *wt->ctid = wt->tid; } rc = wt->func(wt->arg); - if (wt->flags & CLONE_CHILD_CLEARTID) { - *wt->ctid = 0; - } _Exit1(rc); } static textwindows int CloneWindows(int (*func)(void *), char *stk, size_t stksz, int flags, void *arg, - int *ptid, void *tls, size_t tlssz, - int *ctid) { + void *tls, size_t tlssz, int *ctid) { int64_t h; struct WinThread *wt; wt = (struct WinThread *)(((intptr_t)(stk + stksz) - @@ -138,14 +99,11 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk, wt->ctid = ctid; wt->func = func; wt->arg = arg; + wt->tls = tls; if ((h = CreateThread(0, 0, WinThreadThunk, wt, 0, &wt->tid))) { CloseHandle(h); - if (flags & CLONE_PARENT_SETTID) { - *ptid = wt->tid; - } return wt->tid; } else { - __releasefd(wt->tid); return -1; } } @@ -179,14 +137,11 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg, *(int *)sp[2] = tid; } rc = func(arg); - if (sp[4] & CLONE_CHILD_CLEARTID) { - *(int *)sp[2] = 0; - } _Exit1(rc); } static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, - void *arg, int *ptid, void *tls, size_t tlssz, int *ctid) { + void *arg, void *tls, size_t tlssz, int *ctid) { int rc; bool failed; intptr_t *sp; @@ -212,9 +167,6 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, _seizelock(sp); // TODO: How can we get the tid without locking? if ((rc = bsdthread_create(fn, arg, sp, 0, PTHREAD_START_CUSTOM_XNU)) != -1) { _spinlock(sp); - if (flags & CLONE_PARENT_SETTID) { - *ptid = sp[1]; - } rc = sp[1]; } return rc; @@ -236,15 +188,11 @@ FreebsdThreadMain(intptr_t *sp) { *(int *)sp[2] = sp[4]; } rc = ((int (*)(intptr_t))sp[0])(sp[1]); - if (sp[3] & CLONE_CHILD_CLEARTID) { - *(int *)sp[2] = 0; - } _Exit1(rc); } static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, int *ptid, void *tls, size_t tlssz, - int *ctid) { + void *arg, void *tls, size_t tlssz, int *ctid) { int ax; bool failed; int64_t tid; @@ -270,15 +218,11 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags, : CFLAG_CONSTRAINT(failed), "=a"(ax) : "1"(__NR_thr_new), "D"(¶ms), "S"(sizeof(params)) : "rcx", "rdx", "r8", "r9", "r10", "r11", "memory"); - if (!failed) { - if (flags & CLONE_PARENT_SETTID) { - *ptid = tid; - } - return tid; - } else { + if (failed) { errno = ax; - return -1; + tid = -1; } + return tid; } struct __tfork { @@ -313,15 +257,11 @@ static privileged wontreturn void OpenbsdThreadMain(intptr_t *sp) { int rc; rc = ((int (*)(intptr_t))sp[0])(sp[1]); - if (sp[3] & CLONE_CHILD_CLEARTID) { - *(int *)sp[2] = 0; - } _Exit1(rc); } static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, int *ptid, void *tls, size_t tlssz, - int *ctid) { + void *arg, void *tls, size_t tlssz, int *ctid) { int tid; intptr_t *sp; struct __tfork params; @@ -333,11 +273,7 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags, params.tf_stack = sp; params.tf_tcb = flags & CLONE_SETTLS ? tls : 0; params.tf_tid = flags & CLONE_CHILD_SETTID ? ctid : 0; - if ((tid = __tfork(¶ms, sizeof(params), sp)) > 0) { - if (flags & CLONE_PARENT_SETTID) { - *ptid = tid; - } - } else { + if ((tid = __tfork(¶ms, sizeof(params), sp)) < 0) { errno = -tid; tid = -1; } @@ -351,15 +287,11 @@ static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg), *ctid = *tid; } rc = func(arg); - if (flags & CLONE_CHILD_CLEARTID) { - *ctid = 0; - } _Exit1(rc); } static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, - void *arg, int *ptid, void *tls, size_t tlssz, - int *ctid) { + void *arg, void *tls, size_t tlssz, int *ctid) { // NetBSD has its own clone() and it works, but it's technically a // second-class API, intended to help Linux folks migrate to this! // We put it on the thread's stack, to avoid locking this function @@ -414,9 +346,6 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags, : "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(tid) : "rcx", "r11", "memory"); if (!failed) { - if (flags & CLONE_PARENT_SETTID) { - *ptid = *tid; - } return *tid; } else { errno = ax; @@ -453,49 +382,65 @@ static int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags, * Creates thread. * * Threads are created in a detached manner. They currently can't be - * synchronized using wait() and posix signals. Threads created by this + * synchronized using wait() or posix signals. Threads created by this * function should be synchronized using shared memory operations. * * Any memory that's required by this system call wrapper is allocated * to the top of your stack. This is normally about 64 bytes, although * on NetBSD it's currently 800. * + * Your function is called from within the stack you specify. A return + * address is pushed onto your stack, that causes returning to jump to + * _Exit1() which terminates the thread. Even though the callback says + * it supports a return code, that'll only work on Linux and Windows. + * + * The `tls` parameter is for thread-local storage. If you specify this + * then clone() will implicitly rewire libc (e.g. errno) to use TLS: + * + * static char tib[64]; + * __initialize_tls(tib); + * __install_tls(tib); + * + * If you want a main process TLS size that's larger call it manually. + * Once you've done the above and/or started creating your own threads + * you'll be able to access your `tls` thread information block, using + * + * char *p = __get_tls(); + * printf("errno is %d\n", *(int *)(p + 0x3c)); + * * This function follows the same ABI convention as the Linux userspace * libraries, with a few small changes. The varargs has been removed to * help prevent broken code, and the stack size and tls size parameters * are introduced for compatibility with FreeBSD. * + * To keep this system call lightweight, only the thread creation use + * case is polyfilled across platforms. For example, if you want fork + * that works on OpenBSD for example, don't do it with clone(SIGCHLD) + * and please just call fork(). Even if you do that on Linux, it will + * effectively work around libc features like atfork(), so that means + * other calls like getpid() may return incorrect values. + * * @param func is your callback function * @param stk points to the bottom of a caller allocated stack, which - * must be null when fork() and vfork() equivalent flags are used - * and furthermore this must be mmap()'d using MAP_STACK in order - * to work on OpenBSD - * @param stksz is the size of that stack in bytes which must be zero - * if the fork() or vfork() equivalent flags are used it's highly - * recommended that this value be GetStackSize(), or else kprintf - * and other runtime services providing memory safety can't do as - * good and quick of a job; this value must be 16-aligned plus it - * must be at minimum 4096 bytes in size - * @param flags usually has one of - * - `SIGCHLD` will delegate to fork() - * - `CLONE_VFORK|CLONE_VM|SIGCHLD` means vfork() + * must be allocated via mmap() using the MAP_STACK flag, or else + * you won't get optimal performance and it won't work on OpenBSD + * @param stksz is the size of that stack in bytes, we recommend that + * that this be set to GetStackSize() or else memory safety tools + * like kprintf() can't do as good and quick of a job; this value + * must be 16-aligned plus it must be at least 4192 bytes in size + * and it's advised to have the bottom-most page, be a guard page + * @param flags should have: * - `CLONE_THREAD|CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND` - * as part high bytes, and the low order byte may optionally contain - * a signal e.g. SIGCHLD, to enable parent notification on terminate - * although the signal isn't supported on non-Linux and non-NetBSD - * at the moment; 'flags' may optionally bitwise or the following: - * - `CLONE_PARENT_SETTID` is needed for `ctid` should be set - * - `CLONE_CHILD_SETTID` is needed for `ptid` should be set - * - `CLONE_SETTLS` is needed to set `%fs` segment to `tls` + * and may optionally bitwise any of the following: + * - `CLONE_CHILD_SETTID` is needed too if you use `ctid` + * - `CLONE_SETTLS` is needed too if you set `tls` * @param arg will be passed to your callback - * @param ptid lets the parent receive the child thread id; - * this parameter is ignored if `CLONE_PARENT_SETTID` is not set * @param tls may be used to set the thread local storage segment; * this parameter is ignored if `CLONE_SETTLS` is not set - * @param tlssz is the size of tls in bytes - * @param ctid lets the child receive its thread id; - * this parameter is ignored if `CLONE_CHILD_SETTID` is not set - * @return tid on success and 0 to the child, or -1 w/ errno + * @param tlssz is the size of tls in bytes which must be at least 64 + * @param ctid lets the child receive its thread id without having to + * call gettid() and is ignored if `CLONE_CHILD_SETTID` isn't set + * @return tid of child on success, or -1 w/ errno * @threadsafe */ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, @@ -503,11 +448,11 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, int rc; __threaded = true; - if (tls && !__hastls) { - __setup_tls(); + if (tls && !__tls_enabled) { + __initialize_tls(tibdefault); + __install_tls(tibdefault); } - // verify memory is kosher if (IsAsan() && ((stksz > PAGESIZE && !__asan_is_valid((char *)stk + PAGESIZE, stksz - PAGESIZE)) || @@ -518,54 +463,27 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg, ((flags & CLONE_CHILD_SETTID) && !__asan_is_valid(ctid, sizeof(*ctid))))) { rc = efault(); - } - - // delegate to bona fide clone() - else if (IsLinux()) { + } else if (!IsTiny() && + (((flags & CLONE_VM) && (stksz < PAGESIZE || (stksz & 15))) || + ((flags & CLONE_SETTLS) && (tlssz < 64 || (tlssz & 7))))) { + rc = einval(); + } else if (IsLinux()) { rc = CloneLinux(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); - } - - // polyfill fork() and vfork() use cases on platforms without clone() - else if ((SupportsWindows() || SupportsBsd()) && - flags == (CLONE_VFORK | CLONE_VM | SIGCHLD)) { - if (IsTiny()) { - rc = einval(); - } else if (!arg && !stksz) { - return vfork(); // don't log clone() - } else { - rc = einval(); - } - } else if ((SupportsWindows() || SupportsBsd()) && flags == SIGCHLD) { - if (IsTiny()) { - rc = eopnotsupp(); - } else if (!arg && !stksz) { - return fork(); // don't log clone() - } else { - rc = einval(); - } - } - - // we now assume we're creating a thread - // these platforms can't do signals the way linux does - else if (!IsTiny() && ((stksz < PAGESIZE || (stksz & 15)) || - (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | - CLONE_CHILD_SETTID)) != - (CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND))) { + } else if (!IsTiny() && (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID | + CLONE_CHILD_SETTID)) != + (CLONE_THREAD | CLONE_VM | CLONE_FS | + CLONE_FILES | CLONE_SIGHAND)) { rc = einval(); } else if (IsXnu()) { - rc = CloneXnu(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); + rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ctid); } else if (IsFreebsd()) { - rc = CloneFreebsd(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); + rc = CloneFreebsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); } else if (IsNetbsd()) { - rc = CloneNetbsd(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); + rc = CloneNetbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); } else if (IsOpenbsd()) { - rc = CloneOpenbsd(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); - } - - // These platforms can't do segment registers like linux does - else if (IsWindows()) { - rc = CloneWindows(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid); + rc = CloneOpenbsd(func, stk, stksz, flags, arg, tls, tlssz, ctid); + } else if (IsWindows()) { + rc = CloneWindows(func, stk, stksz, flags, arg, tls, tlssz, ctid); } else { rc = enosys(); } diff --git a/libc/runtime/getsymboltable.greg.c b/libc/runtime/getsymboltable.greg.c index 60af4749a..38e54f0d4 100644 --- a/libc/runtime/getsymboltable.greg.c +++ b/libc/runtime/getsymboltable.greg.c @@ -20,6 +20,7 @@ #include "libc/bits/bits.h" #include "libc/bits/weaken.h" #include "libc/calls/strace.internal.h" +#include "libc/intrin/spinlock.h" #include "libc/macros.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/symbols.internal.h" @@ -29,6 +30,7 @@ #include "libc/zip.h" #include "libc/zipos/zipos.internal.h" +static char g_lock; static struct SymbolTable *g_symtab; /** @@ -118,6 +120,7 @@ static struct SymbolTable *GetSymbolTableFromElf(void) { */ struct SymbolTable *GetSymbolTable(void) { struct Zipos *z; + if (_trylock(&g_lock)) return 0; if (!g_symtab && !__isworker) { if (weaken(__zipos_get) && (z = weaken(__zipos_get)())) { if ((g_symtab = GetSymbolTableFromZip(z))) { @@ -131,6 +134,7 @@ struct SymbolTable *GetSymbolTable(void) { g_symtab = GetSymbolTableFromElf(); } } + _spunlock(&g_lock); return g_symtab; } diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h index ce551b24b..97a38801d 100644 --- a/libc/runtime/memtrack.internal.h +++ b/libc/runtime/memtrack.internal.h @@ -46,7 +46,7 @@ struct MemoryIntervals { size_t i, n; struct MemoryInterval *p; struct MemoryInterval s[OPEN_MAX]; - _Alignas(64) char lock; + _Alignas(64) int lock; }; extern hidden struct MemoryIntervals _mmi; diff --git a/libc/sock/closesocket-nt.c b/libc/sock/closesocket-nt.c index baf2352ef..fc7232c54 100644 --- a/libc/sock/closesocket-nt.c +++ b/libc/sock/closesocket-nt.c @@ -16,6 +16,9 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/weaken.h" +#include "libc/intrin/kprintf.h" +#include "libc/log/backtrace.internal.h" #include "libc/mem/mem.h" #include "libc/sock/internal.h" diff --git a/libc/sock/kntwsadata.c b/libc/sock/kntwsadata.c index 133dffe3c..32950b2da 100644 --- a/libc/sock/kntwsadata.c +++ b/libc/sock/kntwsadata.c @@ -20,6 +20,7 @@ #include "libc/calls/calls.h" #include "libc/calls/strace.internal.h" #include "libc/dce.h" +#include "libc/intrin/spinlock.h" #include "libc/mem/mem.h" #include "libc/nt/runtime.h" #include "libc/nt/winsock.h" @@ -40,12 +41,6 @@ hidden struct NtWsaData kNtWsaData; static textwindows void WinSockCleanup(void) { int i, rc; NTTRACE("WinSockCleanup()"); - for (i = g_fds.n; i--;) { - if (g_fds.p[i].kind == kFdSocket) { - close(i); - } - } - // TODO(jart): Check WSACleanup() result code rc = WSACleanup(); NTTRACE("WSACleanup() → %d% lm", rc); } diff --git a/libc/sock/socket-nt.c b/libc/sock/socket-nt.c index e919f017a..913b66e14 100644 --- a/libc/sock/socket-nt.c +++ b/libc/sock/socket-nt.c @@ -17,6 +17,7 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/internal.h" +#include "libc/intrin/spinlock.h" #include "libc/mem/mem.h" #include "libc/nt/enum/fileflagandattributes.h" #include "libc/nt/iphlpapi.h" @@ -61,11 +62,13 @@ textwindows int sys_socket_nt(int family, int type, int protocol) { sockfd->family = family; sockfd->type = truetype; sockfd->protocol = protocol; + _spinlock(&__fds_lock); g_fds.p[fd].kind = kFdSocket; g_fds.p[fd].flags = oflags; g_fds.p[fd].mode = 0140666; g_fds.p[fd].handle = h; g_fds.p[fd].extra = (uintptr_t)sockfd; + _spunlock(&__fds_lock); return fd; } else { __releasefd(fd); diff --git a/libc/sock/socketpair-nt.c b/libc/sock/socketpair-nt.c index fb71e728f..44a63c7b9 100644 --- a/libc/sock/socketpair-nt.c +++ b/libc/sock/socketpair-nt.c @@ -16,6 +16,7 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/intrin/spinlock.h" #include "libc/nt/createfile.h" #include "libc/nt/enum/accessmask.h" #include "libc/nt/enum/creationdisposition.h" @@ -74,6 +75,8 @@ textwindows int sys_socketpair_nt(int family, int type, int proto, int sv[2]) { return -1; } + _spinlock(&__fds_lock); + g_fds.p[reader].kind = kFdFile; g_fds.p[reader].flags = oflags; g_fds.p[reader].mode = 0140444; @@ -84,6 +87,8 @@ textwindows int sys_socketpair_nt(int family, int type, int proto, int sv[2]) { g_fds.p[writer].mode = 0140222; g_fds.p[writer].handle = h1; + _spunlock(&__fds_lock); + sv[0] = reader; sv[1] = writer; return 0; diff --git a/libc/stdio/fflush.internal.h b/libc/stdio/fflush.internal.h index 1dd682498..39fbe5e25 100644 --- a/libc/stdio/fflush.internal.h +++ b/libc/stdio/fflush.internal.h @@ -10,7 +10,7 @@ struct StdioFlushHandles { }; struct StdioFlush { - char lock; + int lock; struct StdioFlushHandles handles; FILE *handles_initmem[8]; }; diff --git a/libc/stdio/stdio.h b/libc/stdio/stdio.h index 6c0914db8..2eccd28a6 100644 --- a/libc/stdio/stdio.h +++ b/libc/stdio/stdio.h @@ -24,7 +24,7 @@ typedef struct FILE { uint32_t size; /* 0x20 */ uint32_t nofree; /* 0x24 */ int pid; /* 0x28 */ - char lock; /* 0x2c */ + int lock; /* 0x2c */ char *getln; /* 0x30 */ } FILE; diff --git a/libc/sysv/errno.greg.c b/libc/sysv/errno.greg.c index c0476db59..bfd95d6ff 100644 --- a/libc/sysv/errno.greg.c +++ b/libc/sysv/errno.greg.c @@ -19,7 +19,8 @@ #include "libc/bits/weaken.h" #include "libc/calls/calls.h" #include "libc/dce.h" -#include "libc/intrin/threaded.internal.h" +#include "libc/intrin/threaded.h" +#include "libc/nt/thread.h" /** * Global variable for last error. @@ -34,22 +35,29 @@ * @see __errno_location() stable abi */ errno_t __errno; -int __errno_index; -privileged nocallersavedregisters errno_t *(__errno_location)(void) { - char *tib; - if (!__hastls) { - return &__errno; - } else if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) { - asm("mov\t%%fs:0,%0" : "=a"(tib)); - return (errno_t *)(tib + 0x3c); - } else if (IsXnu()) { - asm("mov\t%%gs:0x30,%0" : "=a"(tib)); - return (errno_t *)(tib + 0x3c); - } else if (IsWindows()) { - asm("mov\t%%gs:0x30,%0" : "=a"(tib)); - return (errno_t *)(tib + 0x1480 + __errno_index * 8); +/** + * Returns address of thread information block. + * @see __install_tls() + * @see clone() + */ +privileged nocallersavedregisters char *__get_tls(void) { + char *tib, *linear = (char *)0x30; + if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd()) { + asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(linear)); } else { - return &__errno; + asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(linear)); + if (IsWindows()) tib = *(char **)(tib + 0x1480 + __tls_index * 8); } + return tib; +} + +/** + * Returns address of errno variable. + * @see __initialize_tls() + * @see __install_tls() + */ +privileged nocallersavedregisters errno_t *(__errno_location)(void) { + if (!__tls_enabled) return &__errno; + return (errno_t *)(__get_tls() + 0x3c); } diff --git a/libc/testlib/testmain.c b/libc/testlib/testmain.c index 2893e06cf..caa89a114 100644 --- a/libc/testlib/testmain.c +++ b/libc/testlib/testmain.c @@ -153,6 +153,7 @@ noasan int main(int argc, char *argv[]) { __log_level = kLogInfo; GetOpts(argc, argv); setenv("GDB", "", true); + GetSymbolTable(); // normalize this process FixIrregularFds(); diff --git a/libc/time/localtime.c b/libc/time/localtime.c index dcede6842..db409027a 100644 --- a/libc/time/localtime.c +++ b/libc/time/localtime.c @@ -42,7 +42,7 @@ STATIC_YOINK("usr/share/zoneinfo/UTC"); ** POSIX-style TZ environment variable handling from Guy Harris. */ -_Alignas(64) static char locallock; +_Alignas(64) static int locallock; static int lock(void) { _spinlock(&locallock); diff --git a/libc/intrin/winthread.c b/test/libc/intrin/tls_test.c similarity index 81% rename from libc/intrin/winthread.c rename to test/libc/intrin/tls_test.c index 89c193fbe..afefb2246 100644 --- a/libc/intrin/winthread.c +++ b/test/libc/intrin/tls_test.c @@ -16,23 +16,20 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/dce.h" -#include "libc/intrin/tls.h" -#include "libc/intrin/winthread.internal.h" +#include "libc/errno.h" +#include "libc/intrin/threaded.h" +#include "libc/runtime/runtime.h" +#include "libc/testlib/testlib.h" -/** - * @fileoverview TLS slot for clone() win32 polyfill. - */ +static char tib[64]; -int __winthread; - -static textstartup void __winthread_init(void) { - if (IsWindows()) { - __winthread = TlsAlloc(); - TlsSetValue(__winthread, 0); - } +TEST(tls, test) { + errno = 31337; + EXPECT_EQ(31337, errno); + EXPECT_EQ(&__errno, __errno_location()); + __initialize_tls(tib); + __install_tls(tib); + EXPECT_EQ(31337, errno); + EXPECT_EQ(tib, __get_tls()); + EXPECT_EQ(tib + 0x3c, (char *)__errno_location()); } - -const void *const __winthread_ctor[] initarray = { - __winthread_init, -}; diff --git a/test/libc/rand/rand64_test.c b/test/libc/rand/rand64_test.c index 4957f91ae..face313f4 100644 --- a/test/libc/rand/rand64_test.c +++ b/test/libc/rand/rand64_test.c @@ -23,6 +23,7 @@ #include "libc/dce.h" #include "libc/errno.h" #include "libc/intrin/spinlock.h" +#include "libc/intrin/threaded.h" #include "libc/macros.internal.h" #include "libc/rand/rand.h" #include "libc/runtime/stack.h" @@ -74,6 +75,7 @@ TEST(rand64, testLcg_doesntProduceIdenticalValues) { } TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { + char *tls[THREADS]; sigset_t ss, oldss; void *stacks[THREADS]; int i, j, rc, ws, tid[THREADS]; @@ -89,12 +91,14 @@ TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { } ready = false; for (i = 0; i < THREADS; ++i) { + tls[i] = calloc(1, 64); + __initialize_tls(tls[i]); stacks[i] = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0); tid[i] = clone(Thrasher, stacks[i], GetStackSize(), CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND, - (void *)(intptr_t)i, 0, 0, 0, 0); + (void *)(intptr_t)i, 0, tls[i], 64, 0); ASSERT_NE(-1, tid[i]); } ready = true; @@ -113,4 +117,7 @@ TEST(rand64, testThreadSafety_doesntProduceIdenticalValues) { for (i = 0; i < THREADS; ++i) { EXPECT_SYS(0, 0, munmap(stacks[i], GetStackSize())); } + for (i = 0; i < THREADS; ++i) { + free(tls[i]); + } } diff --git a/test/libc/runtime/clone_test.c b/test/libc/runtime/clone_test.c index 1db449e66..c686b50d2 100644 --- a/test/libc/runtime/clone_test.c +++ b/test/libc/runtime/clone_test.c @@ -21,6 +21,7 @@ #include "libc/errno.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/spinlock.h" +#include "libc/intrin/threaded.h" #include "libc/mem/mem.h" #include "libc/runtime/stack.h" #include "libc/sysv/consts/clone.h" @@ -31,7 +32,7 @@ #include "libc/time/time.h" char *stack, *tls; -int x, me, thechilde; +int x, me, tid, thechilde; _Alignas(64) volatile char lock; void SetUp(void) { @@ -39,9 +40,8 @@ void SetUp(void) { lock = 0; me = gettid(); thechilde = 0; - tls = calloc(1, 512); - *(intptr_t *)tls = (intptr_t)tls; - *(intptr_t *)(tls + 0x30) = (intptr_t)tls; + tls = calloc(1, 64); + __initialize_tls(tls); *(int *)(tls + 0x3c) = 31337; ASSERT_NE(MAP_FAILED, (stack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, MAP_STACK | MAP_ANONYMOUS, -1, 0))); @@ -75,15 +75,12 @@ int DoNothing(void *arg) { return 0; } -void __setup_tls(void); - TEST(clone, test1) { - int tid; _spinlock(&lock); ASSERT_NE(-1, (tid = clone(CloneTest1, stack, GetStackSize(), CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SETTLS, - (void *)23, 0, tls, 512, 0))); + (void *)23, 0, tls, 64, 0))); _spinlock(&lock); ASSERT_EQ(42, x); ASSERT_NE(me, tid); @@ -91,13 +88,27 @@ TEST(clone, test1) { ASSERT_EQ(0, errno); errno = 31337; ASSERT_EQ(31337, errno); - - return; - intptr_t *p; - asm("movq\t%%fs:0x30,%0" : "=a"(p)); - kprintf("%fs:0x30 = %p\n", p); - for (int i = 0; i < 64; ++i) { - kprintf("0x%.5x = %p\n", i * 8, p[i]); - } - kprintf("\n"); + errno = 0; +} + +int CloneTestSys(void *arg) { + thechilde = gettid(); + ASSERT_EQ(31337, errno); + open(0, 0); + ASSERT_EQ(EFAULT, errno); + _spunlock(&lock); + return 0; +} + +TEST(clone, tlsSystemCallsErrno_wontClobberMainThreadBecauseTls) { + ASSERT_EQ(0, errno); + ASSERT_EQ(31337, *(int *)(tls + 0x3c)); + _spinlock(&lock); + ASSERT_NE(-1, (tid = clone(CloneTestSys, stack, GetStackSize(), + CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_SETTLS, + (void *)23, 0, tls, 64, 0))); + _spinlock(&lock); + ASSERT_EQ(0, errno); + ASSERT_EQ(EFAULT, *(int *)(tls + 0x3c)); } diff --git a/third_party/chibicc/as.c b/third_party/chibicc/as.c index 42883b804..17a21cdb0 100644 --- a/third_party/chibicc/as.c +++ b/third_party/chibicc/as.c @@ -2921,6 +2921,7 @@ static void OnMaxpd(struct As *a, struct Slice s) { OpSse(a, 0x660F5F); } static void OnMaxps(struct As *a, struct Slice s) { OpSse(a, 0x0F5F); } static void OnMaxsd(struct As *a, struct Slice s) { OpSse(a, 0xF20F5F); } static void OnMaxss(struct As *a, struct Slice s) { OpSse(a, 0xF30F5F); } +static void OnMfence(struct As *a, struct Slice s) { EmitVarword(a, 0x0faef0); } static void OnMinpd(struct As *a, struct Slice s) { OpSse(a, 0x660F5D); } static void OnMinps(struct As *a, struct Slice s) { OpSse(a, 0x0F5D); } static void OnMinsd(struct As *a, struct Slice s) { OpSse(a, 0xF20F5D); } @@ -3370,6 +3371,7 @@ static const struct Directive8 { {"maxps", OnMaxps}, // {"maxsd", OnMaxsd}, // {"maxss", OnMaxss}, // + {"mfence", OnMfence}, // {"minpd", OnMinpd}, // {"minps", OnMinps}, // {"minsd", OnMinsd}, // diff --git a/third_party/chibicc/chibicc.h b/third_party/chibicc/chibicc.h index 75eb581af..171f1c8fa 100644 --- a/third_party/chibicc/chibicc.h +++ b/third_party/chibicc/chibicc.h @@ -297,62 +297,65 @@ struct Relocation { }; typedef enum { - ND_NULL_EXPR, // Do nothing - ND_ADD, // + - ND_SUB, // - - ND_MUL, // * - ND_DIV, // / - ND_NEG, // unary - - ND_REM, // % - ND_BINAND, // & - ND_BINOR, // | - ND_BINXOR, // ^ - ND_SHL, // << - ND_SHR, // >> - ND_EQ, // == - ND_NE, // != - ND_LT, // < - ND_LE, // <= - ND_ASSIGN, // = - ND_COND, // ?: - ND_COMMA, // , - ND_MEMBER, // . (struct member access) - ND_ADDR, // unary & - ND_DEREF, // unary * - ND_NOT, // ! - ND_BITNOT, // ~ - ND_LOGAND, // && - ND_LOGOR, // || - ND_RETURN, // "return" - ND_IF, // "if" - ND_FOR, // "for" or "while" - ND_DO, // "do" - ND_SWITCH, // "switch" - ND_CASE, // "case" - ND_BLOCK, // { ... } - ND_GOTO, // "goto" - ND_GOTO_EXPR, // "goto" labels-as-values - ND_LABEL, // Labeled statement - ND_LABEL_VAL, // [GNU] Labels-as-values - ND_FUNCALL, // Function call - ND_EXPR_STMT, // Expression statement - ND_STMT_EXPR, // Statement expression - ND_VAR, // Variable - ND_VLA_PTR, // VLA designator - ND_NUM, // Integer - ND_CAST, // Type cast - ND_MEMZERO, // Zero-clear a stack variable - ND_ASM, // "asm" - ND_CAS, // Atomic compare-and-swap - ND_EXCH, // Atomic exchange - ND_LOAD, // Atomic load - ND_TESTANDSET, // Atomic lock test and set - ND_RELEASE, // Atomic lock release - ND_FETCHADD, // Atomic fetch and add - ND_SUBFETCH, // Atomic sub and fetch - ND_FPCLASSIFY, // floating point classify - ND_MOVNTDQ, // Intel MOVNTDQ - ND_PMOVMSKB, // Intel PMOVMSKB + ND_NULL_EXPR, // Do nothing + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_REM, // % + ND_BINAND, // & + ND_BINOR, // | + ND_BINXOR, // ^ + ND_SHL, // << + ND_SHR, // >> + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_ASSIGN, // = + ND_COND, // ?: + ND_COMMA, // , + ND_MEMBER, // . (struct member access) + ND_ADDR, // unary & + ND_DEREF, // unary * + ND_NOT, // ! + ND_BITNOT, // ~ + ND_LOGAND, // && + ND_LOGOR, // || + ND_RETURN, // "return" + ND_IF, // "if" + ND_FOR, // "for" or "while" + ND_DO, // "do" + ND_SWITCH, // "switch" + ND_CASE, // "case" + ND_BLOCK, // { ... } + ND_GOTO, // "goto" + ND_GOTO_EXPR, // "goto" labels-as-values + ND_LABEL, // Labeled statement + ND_LABEL_VAL, // [GNU] Labels-as-values + ND_FUNCALL, // Function call + ND_EXPR_STMT, // Expression statement + ND_STMT_EXPR, // Statement expression + ND_VAR, // Variable + ND_VLA_PTR, // VLA designator + ND_NUM, // Integer + ND_CAST, // Type cast + ND_MEMZERO, // Zero-clear a stack variable + ND_ASM, // "asm" + ND_CAS, // Atomic compare-and-swap + ND_EXCH, // Atomic exchange + ND_LOAD, // Atomic load + ND_STORE, // Atomic store + ND_TESTANDSET, // Sync lock test and set + ND_TESTANDSETA, // Atomic lock test and set + ND_CLEAR, // Atomic clear + ND_RELEASE, // Atomic lock release + ND_FETCHADD, // Atomic fetch and add + ND_SUBFETCH, // Atomic sub and fetch + ND_FPCLASSIFY, // floating point classify + ND_MOVNTDQ, // Intel MOVNTDQ + ND_PMOVMSKB, // Intel PMOVMSKB } NodeKind; struct Node { @@ -394,6 +397,7 @@ struct Node { // Assembly Asm *azm; // Atomic compare-and-swap + char memorder; Node *cas_addr; Node *cas_old; Node *cas_new; diff --git a/third_party/chibicc/codegen.c b/third_party/chibicc/codegen.c index 21727cf4b..da74d080e 100644 --- a/third_party/chibicc/codegen.c +++ b/third_party/chibicc/codegen.c @@ -1547,6 +1547,14 @@ void gen_expr(Node *node) { println("\txchg\t%s,(%%rdi)", reg_ax(node->ty->size)); return; } + case ND_TESTANDSETA: { + gen_expr(node->lhs); + push(); + println("\tmov\t$1,%%eax"); + pop("%rdi"); + println("\txchg\t%s,(%%rdi)", reg_ax(node->ty->size)); + return; + } case ND_LOAD: { gen_expr(node->rhs); push(); @@ -1556,6 +1564,28 @@ void gen_expr(Node *node) { println("\tmov\t%s,(%%rdi)", reg_ax(node->ty->size)); return; } + case ND_STORE: { + gen_expr(node->lhs); + push(); + gen_expr(node->rhs); + pop("%rdi"); + println("\tmov\t(%%rax),%s", reg_ax(node->ty->size)); + println("\tmov\t%s,(%%rdi)", reg_ax(node->ty->size)); + if (node->memorder) { + println("\tmfence"); + } + return; + } + case ND_CLEAR: { + gen_expr(node->lhs); + println("\tmov\t%%rax,%%rdi"); + println("\txor\t%%eax,%%eax"); + println("\tmov\t%s,(%%rdi)", reg_ax(node->ty->size)); + if (node->memorder) { + println("\tmfence"); + } + return; + } case ND_FETCHADD: { gen_expr(node->lhs); push(); diff --git a/third_party/chibicc/kw.gperf b/third_party/chibicc/kw.gperf index 0f1c7777b..220da7e6e 100644 --- a/third_party/chibicc/kw.gperf +++ b/third_party/chibicc/kw.gperf @@ -119,8 +119,11 @@ __builtin_types_compatible_p, KW___BUILTIN_TYPES_COMPATIBLE_P "->", KW_ARROW ".", KW_DOT __atomic_load, KW___ATOMIC_LOAD -__atomic_fetch_add, KW___ATOMIC_FETCH_ADD +__atomic_store, KW___ATOMIC_STORE +__atomic_clear, KW___ATOMIC_CLEAR __atomic_sub_fetch, KW___ATOMIC_SUB_FETCH +__atomic_fetch_add, KW___ATOMIC_FETCH_ADD +__atomic_test_and_set, KW___ATOMIC_TEST_AND_SET __sync_lock_test_and_set, KW___SYNC_LOCK_TEST_AND_SET __sync_lock_release, KW___SYNC_LOCK_RELEASE __builtin_ia32_movntdq, KW___BUILTIN_IA32_MOVNTDQ diff --git a/third_party/chibicc/kw.h b/third_party/chibicc/kw.h index 62d8194eb..79aa5f796 100644 --- a/third_party/chibicc/kw.h +++ b/third_party/chibicc/kw.h @@ -112,6 +112,9 @@ #define KW___BUILTIN_IA32_MOVNTDQ 128 #define KW___ATOMIC_FETCH_ADD 129 #define KW___ATOMIC_SUB_FETCH 130 +#define KW___ATOMIC_TEST_AND_SET 131 +#define KW___ATOMIC_CLEAR 132 +#define KW___ATOMIC_STORE 133 #if !(__ASSEMBLER__ + __LINKER__ + 0) COSMOPOLITAN_C_START_ diff --git a/third_party/chibicc/kw.inc b/third_party/chibicc/kw.inc index f570e2e53..9238b0dba 100644 --- a/third_party/chibicc/kw.inc +++ b/third_party/chibicc/kw.inc @@ -37,44 +37,44 @@ #line 10 "kw.gperf" struct thatispacked KwSlot { char *name; unsigned char code; }; -#define TOTAL_KEYWORDS 116 +#define TOTAL_KEYWORDS 119 #define MIN_WORD_LENGTH 1 #define MAX_WORD_LENGTH 28 #define MIN_HASH_VALUE 1 -#define MAX_HASH_VALUE 201 -/* maximum key range = 201, duplicates = 0 */ +#define MAX_HASH_VALUE 238 +/* maximum key range = 238, duplicates = 0 */ static inline unsigned int hash (register const char *str, register size_t len) { static const unsigned char asso_values[] = { - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 100, 202, 202, 202, 202, 65, 202, - 95, 90, 85, 15, 202, 0, 75, 202, 202, 202, - 0, 202, 202, 202, 202, 202, 10, 202, 202, 202, - 202, 202, 55, 202, 202, 202, 0, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 5, 202, 0, 50, 0, - 5, 15, 0, 40, 45, 115, 60, 5, 20, 15, - 90, 85, 0, 0, 55, 10, 0, 65, 5, 0, - 0, 10, 25, 70, 35, 30, 5, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202, 202, 202, 202, - 202, 202, 202, 202, 202, 202, 202 + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 115, 239, 239, 239, 239, 50, 239, + 110, 105, 100, 5, 239, 0, 95, 239, 239, 239, + 10, 239, 239, 239, 239, 239, 0, 239, 239, 239, + 239, 239, 45, 239, 239, 239, 0, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 5, 239, 0, 90, 5, + 55, 10, 0, 25, 75, 105, 15, 10, 20, 15, + 125, 60, 15, 10, 10, 10, 0, 70, 5, 5, + 10, 0, 45, 85, 10, 30, 15, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, + 239, 239, 239, 239, 239, 239, 239 }; register unsigned int hval = len; @@ -116,19 +116,21 @@ LookupKw (register const char *str, register size_t len) {"-", KW_MINUS}, #line 116 "kw.gperf" {"--", KW_DECREMENT}, - {""}, {""}, -#line 29 "kw.gperf" - {"const", KW_CONST}, + {""}, {""}, {""}, #line 63 "kw.gperf" {"typeof", KW_TYPEOF}, #line 62 "kw.gperf" {"typedef", KW_TYPEDEF}, - {""}, {""}, {""}, -#line 114 "kw.gperf" - {"~", KW_TILDE}, -#line 68 "kw.gperf" - {"_Atomic", KW__ATOMIC}, {""}, {""}, +#line 29 "kw.gperf" + {"const", KW_CONST}, +#line 109 "kw.gperf" + {"+", KW_PLUS}, +#line 115 "kw.gperf" + {"++", KW_INCREMENT}, +#line 20 "kw.gperf" + {"for", KW_FOR}, + {""}, #line 78 "kw.gperf" {"__restrict", KW_RESTRICT}, #line 22 "kw.gperf" @@ -143,244 +145,261 @@ LookupKw (register const char *str, register size_t len) {"__VA_OPT__", KW___VA_OPT__}, #line 13 "kw.gperf" {"struct", KW_STRUCT}, -#line 60 "kw.gperf" - {"strpbrk", KW_STRPBRK}, - {""}, {""}, -#line 31 "kw.gperf" - {"short", KW_SHORT}, -#line 28 "kw.gperf" - {"double", KW_DOUBLE}, -#line 79 "kw.gperf" - {"__restrict__", KW_RESTRICT}, -#line 95 "kw.gperf" - {"__builtin_popcount", KW___BUILTIN_POPCOUNT}, -#line 17 "kw.gperf" - {"void", KW_VOID}, -#line 69 "kw.gperf" - {"_Bool", KW__BOOL}, -#line 109 "kw.gperf" - {"+", KW_PLUS}, -#line 115 "kw.gperf" - {"++", KW_INCREMENT}, -#line 88 "kw.gperf" - {"__builtin_ffs", KW___BUILTIN_FFS}, +#line 118 "kw.gperf" + {"||", KW_LOGOR}, + {""}, #line 19 "kw.gperf" {"else", KW_ELSE}, -#line 25 "kw.gperf" - {"while", KW_WHILE}, -#line 85 "kw.gperf" - {"__builtin_compare_and_swap", KW___BUILTIN_COMPARE_AND_SWAP}, -#line 82 "kw.gperf" - {"__builtin_add_overflow", KW___BUILTIN_ADD_OVERFLOW}, -#line 81 "kw.gperf" - {"__typeof", KW_TYPEOF}, +#line 31 "kw.gperf" + {"short", KW_SHORT}, +#line 61 "kw.gperf" + {"strstr", KW_STRSTR}, +#line 79 "kw.gperf" + {"__restrict__", KW_RESTRICT}, +#line 67 "kw.gperf" + {"_Alignof", KW__ALIGNOF}, +#line 18 "kw.gperf" + {"char", KW_CHAR}, +#line 48 "kw.gperf" + {"endif", KW_ENDIF}, +#line 114 "kw.gperf" + {"~", KW_TILDE}, +#line 68 "kw.gperf" + {"_Atomic", KW__ATOMIC}, +#line 88 "kw.gperf" + {"__builtin_ffs", KW___BUILTIN_FFS}, #line 55 "kw.gperf" {"line", KW_LINE}, -#line 86 "kw.gperf" - {"__builtin_constant_p", KW___BUILTIN_CONSTANT_P}, +#line 25 "kw.gperf" + {"while", KW_WHILE}, + {""}, +#line 60 "kw.gperf" + {"strpbrk", KW_STRPBRK}, +#line 66 "kw.gperf" + {"_Alignas", KW__ALIGNAS}, +#line 47 "kw.gperf" + {"elif", KW_ELIF}, +#line 49 "kw.gperf" + {"error", KW_ERROR}, #line 74 "kw.gperf" {"__alignof__", KW___ALIGNOF__}, -#line 101 "kw.gperf" - {"__builtin_strpbrk", KW___BUILTIN_STRPBRK}, - {""}, {""}, {""}, {""}, -#line 103 "kw.gperf" - {"__builtin_sub_overflow", KW___BUILTIN_SUB_OVERFLOW}, -#line 72 "kw.gperf" - {"_Thread_local", KW__THREAD_LOCAL}, -#line 96 "kw.gperf" - {"__builtin_popcountl", KW___BUILTIN_POPCOUNTL}, -#line 97 "kw.gperf" - {"__builtin_popcountll", KW___BUILTIN_POPCOUNTLL}, -#line 56 "kw.gperf" - {"pragma", KW_PRAGMA}, -#line 92 "kw.gperf" - {"__builtin_mul_overflow", KW___BUILTIN_MUL_OVERFLOW}, -#line 104 "kw.gperf" - {"__builtin_types_compatible_p", KW___BUILTIN_TYPES_COMPATIBLE_P}, +#line 82 "kw.gperf" + {"__builtin_add_overflow", KW___BUILTIN_ADD_OVERFLOW}, +#line 44 "kw.gperf" + {"register", KW_REGISTER}, {""}, -#line 30 "kw.gperf" - {"float", KW_FLOAT}, +#line 69 "kw.gperf" + {"_Bool", KW__BOOL}, #line 87 "kw.gperf" {"__builtin_expect", KW___BUILTIN_EXPECT}, #line 119 "kw.gperf" {"->", KW_ARROW}, -#line 20 "kw.gperf" - {"for", KW_FOR}, -#line 47 "kw.gperf" - {"elif", KW_ELIF}, +#line 95 "kw.gperf" + {"__builtin_popcount", KW___BUILTIN_POPCOUNT}, + {""}, #line 91 "kw.gperf" {"__builtin_fpclassify", KW___BUILTIN_FPCLASSIFY}, -#line 108 "kw.gperf" - {"}", KW_RB}, -#line 42 "kw.gperf" - {"default", KW_DEFAULT}, - {""}, -#line 89 "kw.gperf" - {"__builtin_ffsl", KW___BUILTIN_FFSL}, -#line 90 "kw.gperf" - {"__builtin_ffsll", KW___BUILTIN_FFSLL}, - {""}, {""}, {""}, -#line 18 "kw.gperf" - {"char", KW_CHAR}, -#line 64 "kw.gperf" - {"undef", KW_UNDEF}, -#line 61 "kw.gperf" - {"strstr", KW_STRSTR}, -#line 118 "kw.gperf" - {"||", KW_LOGOR}, -#line 67 "kw.gperf" - {"_Alignof", KW__ALIGNOF}, -#line 124 "kw.gperf" - {"__sync_lock_test_and_set", KW___SYNC_LOCK_TEST_AND_SET}, -#line 49 "kw.gperf" - {"error", KW_ERROR}, -#line 58 "kw.gperf" - {"strchr", KW_STRCHR}, -#line 40 "kw.gperf" - {"defined", KW_DEFINED}, -#line 65 "kw.gperf" - {"volatile", KW_VOLATILE}, -#line 71 "kw.gperf" - {"_Static_assert", KW__STATIC_ASSERT}, -#line 48 "kw.gperf" - {"endif", KW_ENDIF}, -#line 16 "kw.gperf" - {"static", KW_STATIC}, - {""}, -#line 66 "kw.gperf" - {"_Alignas", KW__ALIGNAS}, -#line 125 "kw.gperf" - {"__sync_lock_release", KW___SYNC_LOCK_RELEASE}, - {""}, -#line 39 "kw.gperf" - {"define", KW_DEFINE}, - {""}, -#line 35 "kw.gperf" - {"continue", KW_CONTINUE}, -#line 43 "kw.gperf" - {"auto", KW_AUTO}, - {""}, #line 99 "kw.gperf" {"__builtin_strchr", KW___BUILTIN_STRCHR}, -#line 21 "kw.gperf" - {"do", KW_DO}, - {""}, {""}, {""}, {""}, {""}, -#line 70 "kw.gperf" - {"_Generic", KW__GENERIC}, +#line 103 "kw.gperf" + {"__builtin_sub_overflow", KW___BUILTIN_SUB_OVERFLOW}, +#line 72 "kw.gperf" + {"_Thread_local", KW__THREAD_LOCAL}, #line 98 "kw.gperf" {"__builtin_reg_class", KW___BUILTIN_REG_CLASS}, {""}, #line 102 "kw.gperf" {"__builtin_strstr", KW___BUILTIN_STRSTR}, -#line 84 "kw.gperf" - {"__atomic_exchange", KW___ATOMIC_EXCHANGE}, -#line 45 "kw.gperf" - {"__attribute__", KW___ATTRIBUTE__}, -#line 83 "kw.gperf" - {"__builtin_assume_aligned", KW___BUILTIN_ASSUME_ALIGNED}, +#line 92 "kw.gperf" + {"__builtin_mul_overflow", KW___BUILTIN_MUL_OVERFLOW}, +#line 81 "kw.gperf" + {"__typeof", KW_TYPEOF}, {""}, -#line 32 "kw.gperf" - {"signed", KW_SIGNED}, +#line 86 "kw.gperf" + {"__builtin_constant_p", KW___BUILTIN_CONSTANT_P}, +#line 108 "kw.gperf" + {"}", KW_RB}, +#line 101 "kw.gperf" + {"__builtin_strpbrk", KW___BUILTIN_STRPBRK}, +#line 104 "kw.gperf" + {"__builtin_types_compatible_p", KW___BUILTIN_TYPES_COMPATIBLE_P}, +#line 89 "kw.gperf" + {"__builtin_ffsl", KW___BUILTIN_FFSL}, +#line 90 "kw.gperf" + {"__builtin_ffsll", KW___BUILTIN_FFSLL}, + {""}, {""}, {""}, +#line 96 "kw.gperf" + {"__builtin_popcountl", KW___BUILTIN_POPCOUNTL}, +#line 97 "kw.gperf" + {"__builtin_popcountll", KW___BUILTIN_POPCOUNTLL}, +#line 85 "kw.gperf" + {"__builtin_compare_and_swap", KW___BUILTIN_COMPARE_AND_SWAP}, {""}, #line 77 "kw.gperf" {"__int128", KW___INT128}, -#line 24 "kw.gperf" - {"long", KW_LONG}, -#line 33 "kw.gperf" - {"break", KW_BREAK}, -#line 50 "kw.gperf" - {"extern", KW_EXTERN}, +#line 17 "kw.gperf" + {"void", KW_VOID}, +#line 64 "kw.gperf" + {"undef", KW_UNDEF}, +#line 28 "kw.gperf" + {"double", KW_DOUBLE}, + {""}, +#line 70 "kw.gperf" + {"_Generic", KW__GENERIC}, +#line 43 "kw.gperf" + {"auto", KW_AUTO}, + {""}, +#line 58 "kw.gperf" + {"strchr", KW_STRCHR}, {""}, #line 76 "kw.gperf" {"__inline", KW_INLINE}, -#line 46 "kw.gperf" - {"_Noreturn", KW__NORETURN}, {""}, {""}, -#line 12 "kw.gperf" - {"if", KW_IF}, -#line 54 "kw.gperf" - {"int", KW_INT}, +#line 39 "kw.gperf" + {"define", KW_DEFINE}, {""}, -#line 37 "kw.gperf" - {"ifdef", KW_IFDEF}, -#line 59 "kw.gperf" - {"strlen", KW_STRLEN}, +#line 57 "kw.gperf" + {"restrict", KW_RESTRICT}, + {""}, {""}, +#line 16 "kw.gperf" + {"static", KW_STATIC}, + {""}, +#line 35 "kw.gperf" + {"continue", KW_CONTINUE}, +#line 127 "kw.gperf" + {"__sync_lock_test_and_set", KW___SYNC_LOCK_TEST_AND_SET}, +#line 30 "kw.gperf" + {"float", KW_FLOAT}, +#line 56 "kw.gperf" + {"pragma", KW_PRAGMA}, {""}, #line 94 "kw.gperf" {"__builtin_offsetof", KW___BUILTIN_OFFSETOF}, -#line 34 "kw.gperf" - {"enum", KW_ENUM}, - {""}, -#line 27 "kw.gperf" - {"switch", KW_SWITCH}, -#line 93 "kw.gperf" - {"__builtin_neg_overflow", KW___BUILTIN_NEG_OVERFLOW}, -#line 57 "kw.gperf" - {"restrict", KW_RESTRICT}, -#line 51 "kw.gperf" - {"goto", KW_GOTO}, +#line 128 "kw.gperf" + {"__sync_lock_release", KW___SYNC_LOCK_RELEASE}, {""}, #line 111 "kw.gperf" {"&", KW_AMP}, #line 117 "kw.gperf" {"&&", KW_LOGAND}, -#line 80 "kw.gperf" - {"__thread", KW__THREAD_LOCAL}, +#line 45 "kw.gperf" + {"__attribute__", KW___ATTRIBUTE__}, +#line 51 "kw.gperf" + {"goto", KW_GOTO}, {""}, {""}, +#line 12 "kw.gperf" + {"if", KW_IF}, +#line 54 "kw.gperf" + {"int", KW_INT}, +#line 122 "kw.gperf" + {"__atomic_store", KW___ATOMIC_STORE}, +#line 37 "kw.gperf" + {"ifdef", KW_IFDEF}, +#line 126 "kw.gperf" + {"__atomic_test_and_set", KW___ATOMIC_TEST_AND_SET}, +#line 84 "kw.gperf" + {"__atomic_exchange", KW___ATOMIC_EXCHANGE}, +#line 65 "kw.gperf" + {"volatile", KW_VOLATILE}, + {""}, {""}, {""}, +#line 21 "kw.gperf" + {"do", KW_DO}, + {""}, +#line 71 "kw.gperf" + {"_Static_assert", KW__STATIC_ASSERT}, + {""}, #line 38 "kw.gperf" {"ifndef", KW_IFNDEF}, - {""}, -#line 23 "kw.gperf" - {"unsigned", KW_UNSIGNED}, {""}, {""}, -#line 107 "kw.gperf" - {"{", KW_LB}, +#line 24 "kw.gperf" + {"long", KW_LONG}, + {""}, {""}, {""}, {""}, +#line 123 "kw.gperf" + {"__atomic_clear", KW___ATOMIC_CLEAR}, + {""}, +#line 32 "kw.gperf" + {"signed", KW_SIGNED}, +#line 40 "kw.gperf" + {"defined", KW_DEFINED}, + {""}, {""}, {""}, +#line 53 "kw.gperf" + {"inline", KW_INLINE}, +#line 36 "kw.gperf" + {"include", KW_INCLUDE}, + {""}, {""}, {""}, +#line 50 "kw.gperf" + {"extern", KW_EXTERN}, #line 52 "kw.gperf" {"include_next", KW_INCLUDE_NEXT}, {""}, {""}, {""}, -#line 100 "kw.gperf" - {"__builtin_strlen", KW___BUILTIN_STRLEN}, -#line 126 "kw.gperf" - {"__builtin_ia32_movntdq", KW___BUILTIN_IA32_MOVNTDQ}, - {""}, {""}, {""}, -#line 120 "kw.gperf" - {".", KW_DOT}, -#line 36 "kw.gperf" - {"include", KW_INCLUDE}, -#line 122 "kw.gperf" - {"__atomic_fetch_add", KW___ATOMIC_FETCH_ADD}, - {""}, {""}, #line 14 "kw.gperf" {"return", KW_RETURN}, - {""}, {""}, {""}, -#line 26 "kw.gperf" - {"union", KW_UNION}, -#line 127 "kw.gperf" + {""}, +#line 23 "kw.gperf" + {"unsigned", KW_UNSIGNED}, +#line 46 "kw.gperf" + {"_Noreturn", KW__NORETURN}, + {""}, +#line 130 "kw.gperf" {"__builtin_ia32_pmovmskb128", KW___BUILTIN_IA32_PMOVMSKB128}, +#line 42 "kw.gperf" + {"default", KW_DEFAULT}, + {""}, +#line 34 "kw.gperf" + {"enum", KW_ENUM}, + {""}, +#line 59 "kw.gperf" + {"strlen", KW_STRLEN}, +#line 129 "kw.gperf" + {"__builtin_ia32_movntdq", KW___BUILTIN_IA32_MOVNTDQ}, + {""}, +#line 83 "kw.gperf" + {"__builtin_assume_aligned", KW___BUILTIN_ASSUME_ALIGNED}, + {""}, +#line 27 "kw.gperf" + {"switch", KW_SWITCH}, + {""}, {""}, {""}, {""}, {""}, +#line 93 "kw.gperf" + {"__builtin_neg_overflow", KW___BUILTIN_NEG_OVERFLOW}, + {""}, {""}, {""}, +#line 107 "kw.gperf" + {"{", KW_LB}, + {""}, +#line 80 "kw.gperf" + {"__thread", KW__THREAD_LOCAL}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 100 "kw.gperf" + {"__builtin_strlen", KW___BUILTIN_STRLEN}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 120 "kw.gperf" + {".", KW_DOT}, + {""}, {""}, {""}, +#line 33 "kw.gperf" + {"break", KW_BREAK}, + {""}, {""}, {""}, {""}, {""}, #line 112 "kw.gperf" {"*", KW_STAR}, {""}, #line 121 "kw.gperf" {"__atomic_load", KW___ATOMIC_LOAD}, - {""}, {""}, {""}, {""}, -#line 44 "kw.gperf" - {"register", KW_REGISTER}, - {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, #line 106 "kw.gperf" {")", KW_RP}, - {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 26 "kw.gperf" + {"union", KW_UNION}, + {""}, {""}, {""}, {""}, {""}, #line 105 "kw.gperf" {"(", KW_LP}, - {""}, {""}, {""}, {""}, -#line 53 "kw.gperf" - {"inline", KW_INLINE}, - {""}, -#line 123 "kw.gperf" - {"__atomic_sub_fetch", KW___ATOMIC_SUB_FETCH}, - {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, #line 113 "kw.gperf" - {"!", KW_EXCLAIM} + {"!", KW_EXCLAIM}, + {""}, +#line 125 "kw.gperf" + {"__atomic_fetch_add", KW___ATOMIC_FETCH_ADD}, + {""}, {""}, {""}, {""}, +#line 124 "kw.gperf" + {"__atomic_sub_fetch", KW___ATOMIC_SUB_FETCH} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) diff --git a/third_party/chibicc/parse.c b/third_party/chibicc/parse.c index 7945505e8..284613ca3 100644 --- a/third_party/chibicc/parse.c +++ b/third_party/chibicc/parse.c @@ -3259,7 +3259,42 @@ static Node *primary(Token **rest, Token *tok) { tok = skip(tok, ','); node->rhs = assign(&tok, tok); tok = skip(tok, ','); - const_expr(&tok, tok); + node->memorder = const_expr(&tok, tok); + *rest = skip(tok, ')'); + return node; + } + if (kw == KW___ATOMIC_STORE) { + Node *node = new_node(ND_STORE, tok); + tok = skip(tok->next, '('); + node->lhs = assign(&tok, tok); + add_type(node->lhs); + node->ty = node->lhs->ty->base; + tok = skip(tok, ','); + node->rhs = assign(&tok, tok); + tok = skip(tok, ','); + node->memorder = const_expr(&tok, tok); + *rest = skip(tok, ')'); + return node; + } + if (kw == KW___ATOMIC_TEST_AND_SET) { + Node *node = new_node(ND_TESTANDSETA, tok); + tok = skip(tok->next, '('); + node->lhs = assign(&tok, tok); + add_type(node->lhs); + node->ty = node->lhs->ty->base; + tok = skip(tok, ','); + node->memorder = const_expr(&tok, tok); + *rest = skip(tok, ')'); + return node; + } + if (kw == KW___ATOMIC_CLEAR) { + Node *node = new_node(ND_CLEAR, tok); + tok = skip(tok->next, '('); + node->lhs = assign(&tok, tok); + add_type(node->lhs); + node->ty = node->lhs->ty->base; + tok = skip(tok, ','); + node->memorder = const_expr(&tok, tok); *rest = skip(tok, ')'); return node; } diff --git a/third_party/chibicc/test/spinlock_test.c b/third_party/chibicc/test/spinlock_test.c index 5e50af42d..07086abb0 100644 --- a/third_party/chibicc/test/spinlock_test.c +++ b/third_party/chibicc/test/spinlock_test.c @@ -15,12 +15,66 @@ #define SPUNLOCK(lock) __sync_lock_release(lock) +//////////////////////////////////////////////////////////////////////////////// + +#define SPINLOCK2(lock) \ + do { \ + for (;;) { \ + typeof(*(lock)) x; \ + __atomic_load(lock, &x, __ATOMIC_RELAXED); \ + if (!x && !__atomic_test_and_set(lock, __ATOMIC_SEQ_CST)) { \ + break; \ + } else { \ + __builtin_ia32_pause(); \ + } \ + } \ + } while (0) + +#define SPUNLOCK2(lock) __sync_lock_release(lock) + +//////////////////////////////////////////////////////////////////////////////// + _Alignas(64) char lock; main() { + int x, y; + ASSERT(0, lock); SPINLOCK(&lock); ASSERT(1, lock); SPUNLOCK(&lock); ASSERT(0, lock); + + ASSERT(0, lock); + SPINLOCK2(&lock); + ASSERT(1, lock); + SPUNLOCK2(&lock); + ASSERT(0, lock); + + x = 0; + y = 7; + ASSERT(0, x); + ASSERT(7, y); + __atomic_store(&x, &y, __ATOMIC_RELAXED); + ASSERT(7, x); + ASSERT(7, y); + + x = 0; + y = 7; + ASSERT(0, x); + ASSERT(7, y); + __atomic_store(&x, &y, __ATOMIC_SEQ_CST); + ASSERT(7, x); + ASSERT(7, y); + + x = 5; + y = __atomic_test_and_set(&x, __ATOMIC_SEQ_CST); + ASSERT(1, x); + ASSERT(5, y); + + x = 5; + __atomic_clear(&x, __ATOMIC_SEQ_CST); + ASSERT(0, x); + + // } diff --git a/third_party/dlmalloc/dlmalloc.greg.c b/third_party/dlmalloc/dlmalloc.greg.c index 6bc34ac80..5d34cc0e2 100644 --- a/third_party/dlmalloc/dlmalloc.greg.c +++ b/third_party/dlmalloc/dlmalloc.greg.c @@ -20,7 +20,7 @@ #include "third_party/dlmalloc/dlmalloc.h" // clang-format off -#define FOOTERS 0 +#define FOOTERS 1 #define MSPACES 0 #define HAVE_MMAP 1