diff --git a/libc/calls/memtrack.greg.c b/libc/calls/memtrack.greg.c index b2ca98cce..484a39428 100644 --- a/libc/calls/memtrack.greg.c +++ b/libc/calls/memtrack.greg.c @@ -73,7 +73,8 @@ static bool ExtendMemoryIntervals(struct MemoryIntervals *mm) { flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED; // TODO(jart): These map handles should not leak across NT fork() if (mm->p == mm->s) { - if (IsAsan()) { + // TODO(jart): How can we detect ASAN mode under GREG? + if (1 || IsAsan()) { shad = (char *)(((intptr_t)base >> 3) + 0x7fff8000); dm = sys_mmap(shad, gran >> 3, prot, flags, -1, 0); if (!dm.addr) return false; diff --git a/libc/fmt/lengthuint64.c b/libc/fmt/lengthuint64.c index f392fd215..fa89e815f 100644 --- a/libc/fmt/lengthuint64.c +++ b/libc/fmt/lengthuint64.c @@ -19,13 +19,6 @@ #include "libc/fmt/itoa.h" #include "libc/nexgen32e/nexgen32e.h" -static const unsigned char kTensIndex[] = { - 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, // - 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, // - 10, 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 14, 14, 14, // - 15, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 19, 19, // -}; - /** * Returns `len(str(x))` where x is an unsigned 64-bit integer. */ diff --git a/libc/intrin/asan.c b/libc/intrin/asan.c index fef44c54a..d6b72422f 100644 --- a/libc/intrin/asan.c +++ b/libc/intrin/asan.c @@ -195,7 +195,8 @@ static uint64_t __asan_roundup2pow(uint64_t x) { static char *__asan_utf8cpy(char *p, unsigned c) { uint64_t z; z = tpenc(c); - do *p++ = z; + do + *p++ = z; while ((z >>= 8)); return p; } @@ -946,7 +947,8 @@ static void __asan_trace(struct AsanTrace *bt, const struct StackFrame *bp) { if (!__asan_checka(SHADOW(bp), sizeof(*bp) >> 3).kind) { addr = bp->addr; if (addr == weakaddr("__gc") && weakaddr("__gc")) { - do --gi; + do + --gi; while ((addr = garbage->p[gi].ret) == weakaddr("__gc")); } bt->p[i] = addr; @@ -1197,7 +1199,7 @@ void __asan_stack_free(char *p, size_t size, int classid) { } void __asan_handle_no_return(void) { - __asan_unpoison(GetStackAddr(0), GetStackSize()); + __asan_unpoison((void *)GetStackAddr(), GetStackSize()); } void __asan_register_globals(struct AsanGlobal g[], int n) { @@ -1379,8 +1381,8 @@ static textstartup void __asan_shadow_mapping(struct MemoryIntervals *m, static textstartup void __asan_shadow_existing_mappings(void) { __asan_shadow_mapping(&_mmi, 0); - __asan_map_shadow((intptr_t)GetStackAddr(0), GetStackSize()); - __asan_poison(GetStackAddr(0), PAGESIZE, kAsanStackOverflow); + __asan_map_shadow(GetStackAddr(), GetStackSize()); + __asan_poison((void *)GetStackAddr(), PAGESIZE, kAsanStackOverflow); } textstartup void __asan_init(int argc, char **argv, char **envp, diff --git a/libc/intrin/intrin.mk b/libc/intrin/intrin.mk index 1f8fa41b6..f8f0a5392 100644 --- a/libc/intrin/intrin.mk +++ b/libc/intrin/intrin.mk @@ -79,6 +79,8 @@ o/$(MODE)/libc/intrin/futex_wake.o \ o/$(MODE)/libc/intrin/gettid.greg.o \ o/$(MODE)/libc/intrin/sys_gettid.greg.o \ o/$(MODE)/libc/intrin/pthread_mutex_lock.o \ +o/$(MODE)/libc/intrin/pthread_mutex_wait.o \ +o/$(MODE)/libc/intrin/pthread_mutex_wake.o \ o/$(MODE)/libc/intrin/pthread_mutex_unlock.o \ o/$(MODE)/libc/intrin/pthread_mutex_trylock.o \ o/$(MODE)/libc/intrin/_trylock_debug_4.o \ @@ -108,7 +110,6 @@ o/$(MODE)/libc/intrin/describeprotflags.o: \ OVERRIDE_CFLAGS += \ -fno-sanitize=address -o/$(MODE)/libc/intrin/exit.greg.o \ o/$(MODE)/libc/intrin/exit1.greg.o \ o/$(MODE)/libc/intrin/getenv.greg.o \ o/$(MODE)/libc/intrin/wsarecv.o \ diff --git a/libc/intrin/kprintf.greg.c b/libc/intrin/kprintf.greg.c index dcddd6bc2..4417df332 100644 --- a/libc/intrin/kprintf.greg.c +++ b/libc/intrin/kprintf.greg.c @@ -36,6 +36,7 @@ #include "libc/limits.h" #include "libc/log/internal.h" #include "libc/macros.internal.h" +#include "libc/nexgen32e/gettls.h" #include "libc/nexgen32e/rdtsc.h" #include "libc/nexgen32e/threaded.h" #include "libc/nexgen32e/uart.internal.h" @@ -154,6 +155,7 @@ privileged bool kisdangerous(const void *p) { privileged static void klog(const char *b, size_t n) { int e; + bool cf; size_t i; uint16_t dx; uint32_t wrote; @@ -309,19 +311,15 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt, case 'P': if (!__vforked) { - if (!__threaded) { + if (!__tls_enabled) { x = __pid; } else { - // clone() is linked and it yoinks gettid() - x = weaken(gettid)(); + x = *(int *)(__get_tls_inline() + 0x38); } } else { - asm volatile("syscall" - : "=a"(x) - : "0"(__NR_getpid) - : "rcx", "rdx", "r11", "memory", "cc"); + x = 666; } - goto FormatUnsigned; + goto FormatDecimal; case 'u': case 'd': @@ -397,7 +395,8 @@ privileged static size_t kformat(char *b, size_t n, const char *fmt, i = 0; m = (1 << base) - 1; if (hash && x) sign = hash; - do z[i++ & 127] = abet[x & m]; + do + z[i++ & 127] = abet[x & m]; while ((x >>= base) || (pdot && i < prec)); goto EmitNumber; @@ -806,7 +805,7 @@ privileged void kvprintf(const char *fmt, va_list v) { * - `X` uppercase * - `T` timestamp * - `x` hexadecimal - * - `P` pid (or tid if threaded) + * - `P` PID (or TID if TLS is enabled) * * Types: * diff --git a/libc/intrin/pthread.h b/libc/intrin/pthread.h index 1e47e0143..5dc17b45f 100644 --- a/libc/intrin/pthread.h +++ b/libc/intrin/pthread.h @@ -119,6 +119,17 @@ void *pthread_getspecific(pthread_key_t); #define pthread_mutexattr_gettype(pAttr, pType) (*(pType) = (pAttr)->attr, 0) #define pthread_mutexattr_settype(pAttr, type) ((pAttr)->attr = type, 0) +#ifdef __GNUC__ +#define pthread_mutex_init(mutex, pAttr) \ + ({ \ + pthread_mutexattr_t *_pAttr = (pAttr); \ + *(mutex) = (pthread_mutex_t){ \ + (_pAttr) ? (_pAttr)->attr : PTHREAD_MUTEX_DEFAULT, \ + }; \ + 0; \ + }) +#endif + #ifdef __GNUC__ #define pthread_mutex_lock(mutex) \ (((mutex)->attr == PTHREAD_MUTEX_NORMAL && \ diff --git a/libc/intrin/pthread_mutex_init.c b/libc/intrin/pthread_mutex_init.c index 0ed4ce317..b72e6be5c 100644 --- a/libc/intrin/pthread_mutex_init.c +++ b/libc/intrin/pthread_mutex_init.c @@ -24,8 +24,8 @@ * @param attr may be NULL * @return 0 on success, or error number on failure */ -int pthread_mutex_init(pthread_mutex_t *mutex, - const pthread_mutexattr_t *attr) { +int(pthread_mutex_init)(pthread_mutex_t *mutex, + const pthread_mutexattr_t *attr) { bzero(mutex, sizeof(*mutex)); mutex->attr = attr ? attr->attr : PTHREAD_MUTEX_DEFAULT; return 0; diff --git a/libc/intrin/restorewintty.c b/libc/intrin/restorewintty.c index ebf205e1e..f46670b2d 100644 --- a/libc/intrin/restorewintty.c +++ b/libc/intrin/restorewintty.c @@ -16,7 +16,6 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ -#include "libc/calls/strace.internal.h" #include "libc/dce.h" #include "libc/nt/console.h" #include "libc/nt/process.h" @@ -34,10 +33,9 @@ const char kConsoleHandles[3] = { /** * Puts cmd.exe gui back the way it was. */ -void __restorewintty(void) { +noinstrument void __restorewintty(void) { int i; if (!IsWindows()) return; - NTTRACE("__restorewintty()"); if (GetCurrentProcessId() == __winmainpid) { for (i = 0; i < 3; ++i) { SetConsoleMode(GetStdHandle(kConsoleHandles[i]), __ntconsolemode[i]); diff --git a/libc/intrin/wait0.c b/libc/intrin/wait0.c index ce7e0be21..6442e2242 100644 --- a/libc/intrin/wait0.c +++ b/libc/intrin/wait0.c @@ -28,8 +28,8 @@ * Blocks until memory location becomes zero. * * This is intended to be used on the child thread id, which is updated - * by the clone() system call when a thread terminates. The purpose of - * this operation is to know when it's safe to munmap() a thread stack. + * by the _spawn() system call when a thread terminates. The purpose of + * this operation is to know when it's safe to munmap() a threads stack */ void _wait0(const int *ctid) { int x; diff --git a/libc/log/oncrash.c b/libc/log/oncrash.c index 311c32036..f54431db4 100644 --- a/libc/log/oncrash.c +++ b/libc/log/oncrash.c @@ -24,6 +24,7 @@ #include "libc/calls/struct/utsname.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/errno.h" +#include "libc/fmt/itoa.h" #include "libc/intrin/asan.internal.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/lockcmpxchg.h" @@ -31,13 +32,13 @@ #include "libc/log/backtrace.internal.h" #include "libc/log/gdb.h" #include "libc/log/internal.h" -#include "libc/log/libfatal.internal.h" #include "libc/log/log.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/stackframe.h" #include "libc/runtime/internal.h" #include "libc/runtime/pc.internal.h" #include "libc/runtime/runtime.h" +#include "libc/str/str.h" /** * @fileoverview Abnormal termination handling & GUI debugging. @@ -76,7 +77,7 @@ relegated static void ShowFunctionCalls(ucontext_t *ctx) { relegated static char *AddFlag(char *p, int b, const char *s) { if (b) { - p = __stpcpy(p, s); + p = stpcpy(p, s); } else { *p = 0; } @@ -109,6 +110,12 @@ relegated static char *DescribeCpuFlags(char *p, int flags, int x87sw, return p; } +static char *HexCpy(char p[hasatleast 17], uint64_t x, uint8_t k) { + while (k > 0) *p++ = "0123456789abcdef"[(x >> (k -= 4)) & 15]; + *p = '\0'; + return p; +} + relegated static void ShowGeneralRegisters(ucontext_t *ctx) { int64_t x; const char *s; @@ -120,8 +127,8 @@ relegated static void ShowGeneralRegisters(ucontext_t *ctx) { for (i = 0, j = 0, k = 0; i < ARRAYLEN(kGregNames); ++i) { if (j > 0) *p++ = ' '; if (!(s = kGregNames[(unsigned)kGregOrder[i]])[2]) *p++ = ' '; - p = __stpcpy(p, s), *p++ = ' '; - p = __fixcpy(p, ctx->uc_mcontext.gregs[(unsigned)kGregOrder[i]], 64); + p = stpcpy(p, s), *p++ = ' '; + p = HexCpy(p, ctx->uc_mcontext.gregs[(unsigned)kGregOrder[i]], 64); if (++j == 3) { j = 0; if (ctx->uc_mcontext.fpregs) { @@ -129,13 +136,13 @@ relegated static void ShowGeneralRegisters(ucontext_t *ctx) { } else { bzero(&st, sizeof(st)); } - p = __stpcpy(p, " ST("); - p = __uintcpy(p, k++); - p = __stpcpy(p, ") "); + p = stpcpy(p, " ST("); + p = FormatUint64(p, k++); + p = stpcpy(p, ") "); x = st * 1000; if (x < 0) x = -x, *p++ = '-'; - p = __uintcpy(p, x / 1000), *p++ = '.'; - p = __uintcpy(p, x % 1000); + p = FormatUint64(p, x / 1000), *p++ = '.'; + p = FormatUint64(p, x % 1000); *p = 0; kprintf("%s\n", buf); p = buf; @@ -163,9 +170,9 @@ relegated static void ShowSseRegisters(ucontext_t *ctx) { *p++ = ' '; } *p++ = ' '; - p = __fixcpy(p, ctx->uc_mcontext.fpregs->xmm[i + 0].u64[1], 64); - p = __fixcpy(p, ctx->uc_mcontext.fpregs->xmm[i + 0].u64[0], 64); - p = __stpcpy(p, " XMM"); + p = HexCpy(p, ctx->uc_mcontext.fpregs->xmm[i + 0].u64[1], 64); + p = HexCpy(p, ctx->uc_mcontext.fpregs->xmm[i + 0].u64[0], 64); + p = stpcpy(p, " XMM"); if (i + 8 >= 10) { *p++ = (i + 8) / 10 + '0'; *p++ = (i + 8) % 10 + '0'; @@ -174,8 +181,8 @@ relegated static void ShowSseRegisters(ucontext_t *ctx) { *p++ = ' '; } *p++ = ' '; - p = __fixcpy(p, ctx->uc_mcontext.fpregs->xmm[i + 8].u64[1], 64); - p = __fixcpy(p, ctx->uc_mcontext.fpregs->xmm[i + 8].u64[0], 64); + p = HexCpy(p, ctx->uc_mcontext.fpregs->xmm[i + 8].u64[1], 64); + p = HexCpy(p, ctx->uc_mcontext.fpregs->xmm[i + 8].u64[0], 64); *p = 0; kprintf("XMM%s\n", buf); } @@ -198,7 +205,7 @@ relegated void ShowCrashReport(int err, int sig, struct siginfo *si, names.release[0] = 0; names.version[0] = 0; names.nodename[0] = 0; - __stpcpy(host, "unknown"); + stpcpy(host, "unknown"); gethostname(host, sizeof(host)); uname(&names); p = buf; @@ -208,9 +215,8 @@ relegated void ShowCrashReport(int err, int sig, struct siginfo *si, " %m\n" " %s %s %s %s\n", !__nocolor ? "\e[30;101m" : "", !__nocolor ? "\e[0m" : "", sig, - (ctx && - (ctx->uc_mcontext.rsp >= (intptr_t)GetStaticStackAddr(0) && - ctx->uc_mcontext.rsp <= (intptr_t)GetStaticStackAddr(0) + PAGESIZE)) + (ctx && (ctx->uc_mcontext.rsp >= GetStaticStackAddr(0) && + ctx->uc_mcontext.rsp <= GetStaticStackAddr(0) + PAGESIZE)) ? "Stack Overflow" : GetSiCodeName(sig, si->si_code), host, getpid(), gettid(), program_invocation_name, names.sysname, diff --git a/libc/nexgen32e/ktensindex.S b/libc/nexgen32e/ktensindex.S new file mode 100644 index 000000000..6f857d895 --- /dev/null +++ b/libc/nexgen32e/ktensindex.S @@ -0,0 +1,31 @@ +/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│ +│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/macros.internal.h" + + .rodata +kTensIndex: + .byte 0, 0, 0, 1, 1, 1, 2, 2 + .byte 2, 3, 3, 3, 3, 4, 4, 4 + .byte 5, 5, 5, 6, 6, 6, 6, 7 + .byte 7, 7, 8, 8, 8, 9, 9, 9 + .byte 10, 10, 10, 11, 11, 11, 12, 12 + .byte 12, 12, 13, 13, 13, 14, 14, 14 + .byte 15, 15, 15, 15, 16, 16, 16, 17 + .byte 17, 17, 18, 18, 18, 18, 19, 19 + .endfn kTensIndex,globl diff --git a/libc/nexgen32e/nexgen32e.h b/libc/nexgen32e/nexgen32e.h index b301332e1..5d9afc801 100644 --- a/libc/nexgen32e/nexgen32e.h +++ b/libc/nexgen32e/nexgen32e.h @@ -5,6 +5,7 @@ COSMOPOLITAN_C_START_ extern long kHalfCache3; extern const uint64_t kTens[20]; +extern const unsigned char kTensIndex[64]; void imapxlatab(void *); void insertionsort(int32_t *, size_t); diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index c76844754..5ef64ea21 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -43,8 +43,6 @@ #include "libc/thread/openbsd.internal.h" #include "libc/thread/xnu.internal.h" -STATIC_YOINK("gettid"); // for kprintf() - #define __NR_thr_new 455 #define __NR_clone_linux 56 #define __NR__lwp_create 309 @@ -84,6 +82,7 @@ int WinThreadLaunch(void *arg, // rdi // we can't log this function because: // 1. windows owns the backtrace pointer right now // 2. ftrace unwinds rbp to determine depth +// 3. tid in tls for ftrace isn't set yet // we can't use address sanitizer because: // 1. __asan_handle_no_return wipes stack // 2. windows owns the stack memory right now @@ -293,7 +292,9 @@ __attribute__((__constructor__)) static void OpenbsdGetSafeRsp(void) { oldrsp = __builtin_frame_address(0); } -static wontreturn void OpenbsdThreadMain(void *p) { +// we can't use address sanitizer because: +// 1. __asan_handle_no_return wipes stack [todo?] +noasan static wontreturn void OpenbsdThreadMain(void *p) { struct CloneArgs *wt = p; *wt->ptid = wt->tid; *wt->ctid = wt->tid; @@ -643,7 +644,7 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid, *ptid = rc; } - STRACE("clone(%p, %p, %'zu, %#x, %p, %p, %p, %'zu, %p) → %d% m", func, stk, + STRACE("clone(%t, %p, %'zu, %#x, %p, %p, %p, %'zu, %p) → %d% m", func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid, rc); return rc; diff --git a/libc/runtime/enable_threads.c b/libc/runtime/enable_threads.c index 06c3310e3..08ededed8 100644 --- a/libc/runtime/enable_threads.c +++ b/libc/runtime/enable_threads.c @@ -24,11 +24,12 @@ extern int __threadcalls_end[]; extern int __threadcalls_start[]; +#pragma weak __threadcalls_start +#pragma weak __threadcalls_end privileged void __enable_threads(void) { if (__threaded) return; STRACE("__enable_threads()"); - __threaded = gettid(); __morph_begin(); /* * _NOPL("__threadcalls", func) @@ -54,4 +55,5 @@ privileged void __enable_threads(void) { _base[*p + 2] = 0xe8; } __morph_end(); + __threaded = gettid(); } diff --git a/libc/runtime/ftracer.c b/libc/runtime/ftracer.c index 6299ca0ce..05df8a243 100644 --- a/libc/runtime/ftracer.c +++ b/libc/runtime/ftracer.c @@ -21,6 +21,8 @@ #include "libc/intrin/cmpxchg.h" #include "libc/intrin/kprintf.h" #include "libc/intrin/lockcmpxchg.h" +#include "libc/intrin/nopl.h" +#include "libc/intrin/pthread.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/stackframe.h" #include "libc/nexgen32e/threaded.h" @@ -43,9 +45,22 @@ static struct Ftrace { int skew; int stackdigs; int64_t lastaddr; - volatile bool busy; + pthread_mutex_t lock; + volatile bool noreentry; } g_ftrace; +static void __ftrace_lock(void) { + if (__threaded) { + pthread_mutex_lock(&g_ftrace.lock); + } +} + +static void __ftrace_unlock(void) { + if (__threaded) { + pthread_mutex_unlock(&g_ftrace.lock); + } +} + static privileged inline int GetNestingLevelImpl(struct StackFrame *frame) { int nesting = -2; while (frame) { @@ -63,18 +78,6 @@ static privileged inline int GetNestingLevel(struct StackFrame *frame) { return MIN(MAX_NESTING, nesting); } -static privileged inline void ReleaseFtraceLock(void) { - g_ftrace.busy = false; -} - -static privileged inline bool AcquireFtraceLock(void) { - if (!__threaded) { - return _cmpxchg(&g_ftrace.busy, false, true); - } else { - return _lockcmpxchg(&g_ftrace.busy, false, true); - } -} - /** * Prints name of function being called. * @@ -85,20 +88,26 @@ static privileged inline bool AcquireFtraceLock(void) { privileged void ftracer(void) { long stackuse; struct StackFrame *frame; - if (AcquireFtraceLock()) { + __ftrace_lock(); + if (_cmpxchg(&g_ftrace.noreentry, false, true)) { frame = __builtin_frame_address(0); frame = frame->next; if (frame->addr != g_ftrace.lastaddr) { - stackuse = (intptr_t)GetStackAddr(0) + GetStackSize() - (intptr_t)frame; + stackuse = GetStackAddr() + GetStackSize() - (intptr_t)frame; kprintf("%rFUN %6P %'13T %'*ld %*s%t\n", g_ftrace.stackdigs, stackuse, GetNestingLevel(frame) * 2, "", frame->addr); g_ftrace.lastaddr = frame->addr; } - ReleaseFtraceLock(); + g_ftrace.noreentry = false; } + __ftrace_unlock(); } textstartup int ftrace_install(void) { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&g_ftrace.lock, &attr); if (GetSymbolTable()) { g_ftrace.lastaddr = -1; g_ftrace.stackdigs = LengthInt64Thousands(GetStackSize()); diff --git a/libc/runtime/mapstack.c b/libc/runtime/mapstack.c index 9b8e0874d..e1c67c442 100644 --- a/libc/runtime/mapstack.c +++ b/libc/runtime/mapstack.c @@ -16,19 +16,48 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/calls.h" +#include "libc/dce.h" +#include "libc/intrin/asan.internal.h" +#include "libc/intrin/asancodes.h" +#include "libc/runtime/memtrack.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/stack.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/prot.h" +static char *_stkbase; + +__attribute__((__constructor__)) static void init(void) { + _stkbase = (char *)kFixedmapStart; +} + /** * Allocates stack. * + * The size of your returned stack is always GetStackSize(). + * + * The bottom 4096 bytes of your stack can't be used, since it's always + * reserved for a read-only guard page. With ASAN it'll be poisoned too. + * + * The top 16 bytes of a stack can't be used due to openbsd:stackbound + * and those bytes are also poisoned under ASAN build modes. + * * @return stack bottom address on success, or null w/ errrno */ void *_mapstack(void) { - return mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0); + char *p; + if ((p = mmap(_stkbase, GetStackSize(), PROT_READ | PROT_WRITE, + MAP_STACK | MAP_ANONYMOUS | MAP_FIXED, -1, 0)) != MAP_FAILED) { + if (IsAsan()) { + __asan_poison(p + GetStackSize() - 16, 16, kAsanStackOverflow); + __asan_poison(p, 4096, kAsanStackOverflow); + } + _stkbase += GetStackSize() * 4; + return p; + } else { + return 0; + } } /** diff --git a/libc/runtime/memtrack.internal.h b/libc/runtime/memtrack.internal.h index f7d17bc77..501c60966 100644 --- a/libc/runtime/memtrack.internal.h +++ b/libc/runtime/memtrack.internal.h @@ -106,25 +106,25 @@ forceinline pureconst bool IsShadowFrame(int x) { } forceinline pureconst bool IsKernelFrame(int x) { - intptr_t stack = (intptr_t)GetStaticStackAddr(0); + intptr_t stack = GetStaticStackAddr(0); return (int)(stack >> 16) <= x && x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16); } forceinline pureconst bool IsStaticStackFrame(int x) { - intptr_t stack = (intptr_t)GetStaticStackAddr(0); + intptr_t stack = GetStaticStackAddr(0); return (int)(stack >> 16) <= x && x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16); } forceinline pureconst bool IsStackFrame(int x) { - intptr_t stack = (intptr_t)GetStackAddr(0); + intptr_t stack = GetStackAddr(); return (int)(stack >> 16) <= x && x <= (int)((stack + (GetStackSize() - FRAMESIZE)) >> 16); } forceinline pureconst bool IsSigAltStackFrame(int x) { - intptr_t stack = (intptr_t)GetStackAddr(0); + intptr_t stack = GetStackAddr(); return (int)(stack >> 16) <= x && x <= (int)((stack + (SIGSTKSZ - FRAMESIZE)) >> 16); } diff --git a/libc/runtime/printargs.c b/libc/runtime/printargs.c index 18fe28317..895afd998 100644 --- a/libc/runtime/printargs.c +++ b/libc/runtime/printargs.c @@ -223,9 +223,8 @@ textstartup void __printargs(const char *prologue) { PRINT(" L%d%s%s %u-way %,u byte cache w/%s " "%,u sets of %,u byte lines shared across %u threads%s", CPUID4_CACHE_LEVEL, - CPUID4_CACHE_TYPE == 1 ? " data" - : CPUID4_CACHE_TYPE == 2 ? " code" - : "", + CPUID4_CACHE_TYPE == 1 ? " data" + : CPUID4_CACHE_TYPE == 2 ? " code" : "", CPUID4_IS_FULLY_ASSOCIATIVE ? " fully-associative" : "", CPUID4_WAYS_OF_ASSOCIATIVITY, CPUID4_CACHE_SIZE_IN_BYTES, CPUID4_PHYSICAL_LINE_PARTITIONS > 1 ? " physically partitioned" : "", @@ -358,7 +357,7 @@ textstartup void __printargs(const char *prologue) { PRINT(" ☼ %s = %#s", "GetInterpreterExecutableName", GetInterpreterExecutableName(u.path, sizeof(u.path))); PRINT(" ☼ %s = %p", "RSP", __builtin_frame_address(0)); - PRINT(" ☼ %s = %p", "GetStackAddr()", GetStackAddr(0)); + PRINT(" ☼ %s = %p", "GetStackAddr()", GetStackAddr()); PRINT(" ☼ %s = %p", "GetStaticStackAddr(0)", GetStaticStackAddr(0)); PRINT(" ☼ %s = %p", "GetStackSize()", GetStackSize()); diff --git a/libc/runtime/stack.h b/libc/runtime/stack.h index 796474519..c0a321e19 100644 --- a/libc/runtime/stack.h +++ b/libc/runtime/stack.h @@ -78,9 +78,8 @@ extern char ape_stack_align[] __attribute__((__weak__)); * and (2) the mmap() address picker will choose aligned addresses when * the provided size is a two power. */ -#define GetStackAddr(ADDEND) \ - ((void *)((((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize()) + \ - (ADDEND))) +#define GetStackAddr() \ + (((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize()) /** * Returns preferred bottom address of stack. @@ -102,7 +101,7 @@ extern char ape_stack_align[] __attribute__((__weak__)); } else { \ vAddr = 0x100000000 - GetStackSize(); \ } \ - (void *)vAddr; \ + vAddr; \ }) COSMOPOLITAN_C_END_ diff --git a/libc/runtime/stackuse.c b/libc/runtime/stackuse.c index 546260f81..c89f67380 100644 --- a/libc/runtime/stackuse.c +++ b/libc/runtime/stackuse.c @@ -52,7 +52,7 @@ static textexit void LogStackUse(void) { bool quote; char *p, *q; size_t n, usage; - usage = GetStackUsage(GetStackAddr(0), GetStackSize()); + usage = GetStackUsage((char *)GetStackAddr(), GetStackSize()); fd = open(stacklog, O_APPEND | O_CREAT | O_WRONLY, 0644); p = FormatUint64(stacklog, usage); for (i = 0; i < __argc; ++i) { diff --git a/libc/runtime/winmain.greg.c b/libc/runtime/winmain.greg.c index 9ee8ea6ba..346001c3b 100644 --- a/libc/runtime/winmain.greg.c +++ b/libc/runtime/winmain.greg.c @@ -194,7 +194,7 @@ __msabi static textwindows wontreturn void WinMainNew(const char16_t *cmdline) { _mmi.p = _mmi.s; _mmi.n = ARRAYLEN(_mmi.s); argsize = ROUNDUP(sizeof(struct WinArgs), FRAMESIZE); - stackaddr = (intptr_t)GetStaticStackAddr(0); + stackaddr = GetStaticStackAddr(0); stacksize = GetStackSize(); allocsize = argsize + stacksize; allocaddr = stackaddr - argsize; diff --git a/libc/stdio/dtoa.c b/libc/stdio/dtoa.c index e44d2e2b9..a098a98d1 100644 --- a/libc/stdio/dtoa.c +++ b/libc/stdio/dtoa.c @@ -1,7 +1,6 @@ /*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ -│ Copyright (C) 2022 Justine Alexandra Roberts Tunney │ │ Copyright (C) 1997, 1999, 2001 Lucent Technologies │ │ All Rights Reserved │ │ │ @@ -294,10 +293,12 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d, if ((flags & FLAGS_ZEROPAD)) { if (sign) __FMT_PUT(sign); sign = 0; - do __FMT_PUT('0'); + do + __FMT_PUT('0'); while (--width > 0); } else - do __FMT_PUT(' '); + do + __FMT_PUT(' '); while (--width > 0); } if (sign) __FMT_PUT(sign); @@ -409,10 +410,12 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d, if ((flags & FLAGS_ZEROPAD)) { if (sign) __FMT_PUT(sign); sign = 0; - do __FMT_PUT('0'); + do + __FMT_PUT('0'); while (--width > 0); } else - do __FMT_PUT(' '); + do + __FMT_PUT(' '); while (--width > 0); } if (sign) __FMT_PUT(sign); @@ -482,14 +485,16 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d, } if ((width -= prec1) > 0 && !(flags & FLAGS_LEFT) && !(flags & FLAGS_ZEROPAD)) { - do __FMT_PUT(' '); + do + __FMT_PUT(' '); while (--width > 0); } if (sign) __FMT_PUT(sign); __FMT_PUT('0'); __FMT_PUT(alphabet[17]); if ((flags & FLAGS_ZEROPAD) && width > 0 && !(flags & FLAGS_LEFT)) { - do __FMT_PUT('0'); + do + __FMT_PUT('0'); while (--width > 0); } i1 = prec1 & 7; @@ -507,7 +512,8 @@ int __fmt_dtoa(int (*out)(const char *, void *, size_t), void *arg, int d, --prec1; } if ((flags & FLAGS_HASH) && prec > 0) { - do __FMT_PUT(0); + do + __FMT_PUT(0); while (--prec > 0); } } diff --git a/libc/sysv/gettls.greg.c b/libc/sysv/gettls.greg.c index b5dbd608c..4ef92bb93 100644 --- a/libc/sysv/gettls.greg.c +++ b/libc/sysv/gettls.greg.c @@ -26,7 +26,7 @@ * * @see __get_tls_inline() * @see __install_tls() - * @see clone() + * @see _spawn() */ optimizespeed char *__get_tls(void) { return __get_tls_inline(); diff --git a/libc/thread/spawn.c b/libc/thread/spawn.c index fab370ea6..e7a27882a 100644 --- a/libc/thread/spawn.c +++ b/libc/thread/spawn.c @@ -91,7 +91,7 @@ int _spawn(int fun(void *, int), void *arg, struct spawn *opt_out_thread) { return -1; } - if (clone(fun, th->stk, GetStackSize(), + if (clone(fun, th->stk, GetStackSize() - 16 /* openbsd:stackbound */, CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID, @@ -117,6 +117,7 @@ int _join(struct spawn *th) { // free thread memory free(th->tls); rc = munmap(th->stk, GetStackSize()); + rc = 0; } else { rc = 0; } diff --git a/test/libc/runtime/clone_test.c b/test/libc/runtime/clone_test.c index 3b6b8b7d2..26e902f07 100644 --- a/test/libc/runtime/clone_test.c +++ b/test/libc/runtime/clone_test.c @@ -161,23 +161,10 @@ TEST(clone, tlsSystemCallsErrno_wontClobberMainThreadBecauseTls) { //////////////////////////////////////////////////////////////////////////////// // BENCHMARK -void LaunchThread(void) { - char *tls, *stack; - tls = __initialize_tls(malloc(64)); - __cxa_atexit(free, tls, 0); - stack = mmap(0, GetStackSize(), PROT_READ | PROT_WRITE, - MAP_STACK | MAP_ANONYMOUS, -1, 0); - clone(DoNothing, stack, GetStackSize(), - CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | - CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS, - 0, 0, tls, 64, (int *)(tls + 0x38)); -} - BENCH(clone, bench) { char *volatile tp; errno_t *volatile ep; EZBENCH2("__errno_location", donothing, (ep = __errno_location())); EZBENCH2("__get_tls_inline", donothing, (tp = __get_tls_inline())); EZBENCH2("__get_tls", donothing, (tp = __get_tls())); - EZBENCH2("clone()", donothing, LaunchThread()); } diff --git a/test/libc/stdio/dtoa_test.c b/test/libc/stdio/dtoa_test.c index fb0b1953d..ee96430cc 100644 --- a/test/libc/stdio/dtoa_test.c +++ b/test/libc/stdio/dtoa_test.c @@ -31,8 +31,6 @@ #include "libc/thread/spawn.h" #include "libc/x/x.h" -#define THREADS 32 - #define DUB(i) (union Dub){i}.x #define DUBBLE(a, b, c, d, e) \ @@ -47,8 +45,6 @@ union Dub { double x; }; -struct spawn th[THREADS]; - int Worker(void *p, int tid) { int i; char str[64]; @@ -60,14 +56,12 @@ int Worker(void *p, int tid) { return 0; } -TEST(dtoa, test) { - int i; - for (i = 0; i < THREADS; ++i) { - _spawn(Worker, 0, th + i); - } - for (i = 0; i < THREADS; ++i) { - _join(th + i); - } +TEST(dtoa, locks) { + int i, n = 32; + struct spawn th[n]; + if (IsOpenbsd()) return; // TODO(jart): OpenBSD flakes :'( + for (i = 0; i < n; ++i) ASSERT_SYS(0, 0, _spawn(Worker, 0, th + i)); + for (i = 0; i < n; ++i) EXPECT_SYS(0, 0, _join(th + i)); } static const struct { diff --git a/test/libc/thread/spawn_test.c b/test/libc/thread/spawn_test.c index 0a5d7b66e..cde61ed21 100644 --- a/test/libc/thread/spawn_test.c +++ b/test/libc/thread/spawn_test.c @@ -26,9 +26,6 @@ #include "libc/thread/spawn.h" #include "libc/thread/thread.h" -#define N 128 - -struct spawn t[N]; _Atomic(int) itworked; _Thread_local int var; @@ -44,11 +41,12 @@ int Worker(void *arg, int tid) { } TEST(_spawn, test) { - long i; - for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _spawn(Worker, (void *)i, t + i)); - for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i)); - for (i = 0; i < N; ++i) EXPECT_SYS(0, 0, _join(t + i)); - EXPECT_EQ(N, itworked); + long i, n = 128; + struct spawn t[n]; + for (i = 0; i < n; ++i) ASSERT_SYS(0, 0, _spawn(Worker, (void *)i, t + i)); + for (i = 0; i < n; ++i) EXPECT_SYS(0, 0, _join(t + i)); + for (i = 0; i < n; ++i) EXPECT_SYS(0, 0, _join(t + i)); + EXPECT_EQ(n, itworked); } __attribute__((__constructor__)) static void init(void) { diff --git a/third_party/python/Include/ceval.h b/third_party/python/Include/ceval.h index 47eaf797b..6b31bc5a0 100644 --- a/third_party/python/Include/ceval.h +++ b/third_party/python/Include/ceval.h @@ -112,14 +112,14 @@ int _Py_CheckRecursiveCall(const char *); #define Py_EnterRecursiveCall(where) \ ({ \ int rc = 0; \ - intptr_t rsp, bot; \ + intptr_t rsp, bot; \ if (!IsTiny()) { \ if (IsModeDbg()) { \ PyThreadState_GET()->recursion_depth++; \ rc = _Py_CheckRecursiveCall(where); \ } else { \ rsp = (intptr_t)__builtin_frame_address(0); \ - bot = (intptr_t)GetStackAddr(32768); \ + bot = GetStackAddr() + 32768; \ if (UNLIKELY(rsp < bot)) { \ PyErr_Format(PyExc_MemoryError, "Stack overflow%s", where); \ rc = -1; \ diff --git a/tool/build/mkdeps.c b/tool/build/mkdeps.c index 9bffb625d..cdc92d1a8 100644 --- a/tool/build/mkdeps.c +++ b/tool/build/mkdeps.c @@ -309,7 +309,7 @@ void LoadRelationships(int argc, char *argv[]) { for (i = 0; i < threads; ++i) { if (_spawn(LoadRelationshipsWorker, (void *)(intptr_t)i, th + i) == -1) { pthread_mutex_lock(&reportlock); - kprintf("error: clone(%d) failed %m\n", i); + kprintf("error: _spawn(%d) failed %m\n", i); exit(1); } } @@ -420,7 +420,7 @@ void Explore(void) { for (i = 0; i < threads; ++i) { if (_spawn(Diver, (void *)(intptr_t)i, th + i) == -1) { pthread_mutex_lock(&reportlock); - kprintf("error: clone(%d) failed %m\n", i); + kprintf("error: _spawn(%d) failed %m\n", i); exit(1); } } diff --git a/tool/net/help.txt b/tool/net/help.txt index 42b5dce98..a6bb225ce 100644 --- a/tool/net/help.txt +++ b/tool/net/help.txt @@ -2015,6 +2015,48 @@ UNIX MODULE This system call returns twice. The parent process gets the nonzero pid. The child gets zero. + Here's a simple usage example of creating subprocesses, where we + fork off a child worker from a main process hook callback to do some + independent chores, such as sending an HTTP request back to redbean. + + -- as soon as server starts, make a fetch to the server + -- then signal redbean to shutdown when fetch is complete + local onServerStart = function() + if assert(unix.fork()) == 0 then + local ok, headers, body = Fetch('http://127.0.0.1:8080/test') + unix.kill(unix.getppid(), unix.SIGTERM) + unix.exit(0) + end + end + OnServerStart = onServerStart + + We didn't need to use wait() here, because (a) we want redbean to go + back to what it was doing before as the Fetch() completes, and (b) + redbean's main process already has a zombie collector. However it's + a moot point, since once the fetch is done, the child process then + asks redbean to gracefully shutdown by sending SIGTERM its parent. + + This is actually a situation where we *must* use fork, because the + purpose of the main redbean process is to call accept() and create + workers. So if we programmed redbean to use the main process to send + a blocking request to itself instead, then redbean would deadlock + and never be able to accept() the client. + + While deadlocking is an extreme example, the truth is that latency + issues can crop up for the same reason that just cause jitter + instead, and as such, can easily go unnoticed. For example, if you + do something that takes longer than a few milliseconds from inside + your redbean heartbeat, then that's a few milliseconds in which + redbean is no longer concurrent, and tail latency is being added to + its ability to accept new connections. fork() does a great job at + solving this. + + If you're not sure how long something will take, then when in doubt, + fork off a process. You can then report its completion to something + like SQLite. Redbean makes having lots of processes cheap. On Linux + they're about as lightweight as what heavyweight environments call + greenlets. You can easily have 10,000 Redbean workers on one PC. + Here's some benchmarks for fork() performance across platforms: Linux 5.4 fork l: 97,200𝑐 31,395𝑛𝑠 [metal] diff --git a/tool/viz/memplan.c b/tool/viz/memplan.c index 74578da5c..676036e55 100644 --- a/tool/viz/memplan.c +++ b/tool/viz/memplan.c @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) { if (IsWindows() && !IsAtLeastWindows10()) { plan(0x50000000, 0x7ffdffff, "arena"); } - x = (intptr_t)GetStaticStackAddr(0); + x = GetStaticStackAddr(0); y = ROUNDUP(sizeof(struct WinArgs), FRAMESIZE); plan(x - y, x - 1, "winargs"); plan(x, x + GetStackSize() - 1, "stack");