diff --git a/libc/dlopen/dlopen.c b/libc/dlopen/dlopen.c index 9be0c1f56..f1d75fcb5 100644 --- a/libc/dlopen/dlopen.c +++ b/libc/dlopen/dlopen.c @@ -45,6 +45,7 @@ #include "libc/nt/memory.h" #include "libc/nt/runtime.h" #include "libc/proc/posix_spawn.h" +#include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/syslib.internal.h" #include "libc/serialize.h" @@ -494,6 +495,8 @@ static uint8_t *movimm(uint8_t p[static 16], int reg, uint64_t val) { static void *foreign_thunk_sysv(void *func) { uint8_t *code, *p; #ifdef __x86_64__ + // it is no longer needed + if (1) return func; // movabs $func,%rax // movabs $foreign_tramp,%r10 // jmp *%r10 @@ -896,3 +899,17 @@ char *cosmo_dlerror(void) { STRACE("dlerror() → %#s", res); return res; } + +#ifdef __x86_64__ +static textstartup void dlopen_init() { + if (IsLinux() || IsFreebsd()) { + // switch from %fs to %gs for tls + struct CosmoTib *tib = __get_tls(); + __morph_tls(); + __set_tls(tib); + } +} +const void *const dlopen_ctor[] initarray = { + dlopen_init, +}; +#endif diff --git a/libc/runtime/set_tls-sysv.S b/libc/intrin/sys_set_tls.S similarity index 100% rename from libc/runtime/set_tls-sysv.S rename to libc/intrin/sys_set_tls.S diff --git a/libc/intrin/tlsmorphed.c b/libc/intrin/tlsmorphed.c new file mode 100644 index 000000000..3a805571e --- /dev/null +++ b/libc/intrin/tlsmorphed.c @@ -0,0 +1,21 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2023 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/runtime/internal.h" + +char __tls_morphed; diff --git a/libc/runtime/clone.c b/libc/runtime/clone.c index 72c76a94f..85d3db9df 100644 --- a/libc/runtime/clone.c +++ b/libc/runtime/clone.c @@ -48,6 +48,7 @@ #include "libc/sock/internal.h" #include "libc/stdalign.internal.h" #include "libc/str/str.h" +#include "libc/sysv/consts/arch.h" #include "libc/sysv/consts/clone.h" #include "libc/sysv/consts/futex.h" #include "libc/sysv/consts/nr.h" @@ -63,6 +64,9 @@ #define kMaxThreadIds 32768 #define kMinThreadId 262144 +#define AMD64_SET_FSBASE 129 +#define AMD64_SET_GSBASE 131 + #define __NR_thr_new 455 #define __NR_clone_linux 56 #define __NR__lwp_create 309 @@ -90,6 +94,7 @@ struct CloneArgs { void *arg; }; +int sys_set_tls(); int __stack_call(void *, int, long, long, int (*)(void *, int), void *); static struct CloneArgs *AllocateCloneArgs(char *stk, size_t stksz) { @@ -390,14 +395,14 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz, //////////////////////////////////////////////////////////////////////////////// // FREE BESIYATA DISHMAYA -void bone(struct CloneArgs *wt) { - *wt->ztid = 0; -} - static wontreturn void FreebsdThreadMain(void *p) { struct CloneArgs *wt = p; #ifdef __aarch64__ asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls)); +#elif defined(__x86_64__) + if (__tls_morphed) { + sys_set_tls(AMD64_SET_GSBASE, wt->tls); + } #endif *wt->ctid = wt->tid; wt->func(wt->arg, wt->tid); @@ -534,6 +539,13 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, //////////////////////////////////////////////////////////////////////////////// // GNU/SYSTEMD +struct LinuxCloneArgs { + int (*func)(void *, int); + void *arg; + char *tls; + int ctid; +}; + int sys_clone_linux(int flags, // rdi long sp, // rsi int *ptid, // rdx @@ -542,24 +554,40 @@ int sys_clone_linux(int flags, // rdi void *func, // r9 void *arg); // 8(rsp) +static int LinuxThreadEntry(void *arg, int tid) { + struct LinuxCloneArgs *wt = arg; + sys_set_tls(ARCH_SET_GS, wt->tls); + return wt->func(wt->arg, tid); +} + static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, int flags, void *arg, void *tls, int *ptid, int *ctid) { int rc; long sp; + struct LinuxCloneArgs *wt; sp = (intptr_t)(stk + stksz); - if (~flags & CLONE_CHILD_SETTID) { - flags |= CLONE_CHILD_SETTID; - sp -= sizeof(int); - sp = sp & -alignof(int); - ctid = (int *)sp; - sp -= 8; // experiment - } + sp -= sizeof(struct LinuxCloneArgs); // align the stack #ifdef __aarch64__ sp = sp & -128; // for kernel 4.6 and earlier #else sp = sp & -16; #endif + wt = (struct LinuxCloneArgs *)sp; +#ifdef __x86_64__ + if ((flags & CLONE_SETTLS) && __tls_morphed) { + flags &= ~CLONE_SETTLS; + wt->arg = arg; + wt->tls = tls; + wt->func = func; + func = LinuxThreadEntry; + arg = wt; + } +#endif + if (~flags & CLONE_CHILD_SETTID) { + flags |= CLONE_CHILD_SETTID; + ctid = &wt->ctid; + } if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) { // clone() is documented as setting ptid before return return 0; diff --git a/libc/runtime/morph_tls.c b/libc/runtime/morph_tls.c index 442b746e5..b780533ed 100644 --- a/libc/runtime/morph_tls.c +++ b/libc/runtime/morph_tls.c @@ -17,9 +17,9 @@ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "ape/sections.internal.h" -#include "libc/serialize.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" +#include "libc/serialize.h" #include "libc/thread/tls.h" typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1))); @@ -55,11 +55,13 @@ privileged void __morph_tls(void) { // address 0x30 was promised to us, according to Go team // https://github.com/golang/go/issues/23617 dis = 0x30; - } else { + } else if (IsWindows()) { // MSVC __declspec(thread) generates binary code for this // %gs:0x1480 abi. So long as TlsAlloc() isn't called >64 // times we should be good. dis = 0x1480 + __tls_index * 8; + } else { + dis = 0; } // iterate over modifiable code looking for 9 byte instruction @@ -112,6 +114,7 @@ privileged void __morph_tls(void) { } } + __tls_morphed = 1; __morph_end(); #endif } diff --git a/libc/runtime/set_tls.c b/libc/runtime/set_tls.c index 3e87f0764..0dd8049c5 100644 --- a/libc/runtime/set_tls.c +++ b/libc/runtime/set_tls.c @@ -26,6 +26,9 @@ #include "libc/thread/tls.h" #include "libc/thread/tls2.internal.h" +#define AMD64_SET_FSBASE 129 +#define AMD64_SET_GSBASE 131 + int sys_set_tls(); // we can't allow --ftrace here because cosmo_dlopen() calls this @@ -37,9 +40,9 @@ dontinstrument textstartup void __set_tls(struct CosmoTib *tib) { if (IsWindows()) { asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib)); } else if (IsFreebsd()) { - sys_set_tls(129 /*AMD64_SET_FSBASE*/, tib); + sys_set_tls(__tls_morphed ? AMD64_SET_GSBASE : AMD64_SET_FSBASE, tib); } else if (IsLinux()) { - sys_set_tls(ARCH_SET_FS, tib); + sys_set_tls(__tls_morphed ? ARCH_SET_GS : ARCH_SET_FS, tib); } else if (IsNetbsd()) { // netbsd has sysarch(X86_SET_FSBASE) but we can't use that because // signal handlers will cause it to be reset due to not setting the diff --git a/libc/thread/tls.h b/libc/thread/tls.h index 45f1a97ff..a713b050c 100644 --- a/libc/thread/tls.h +++ b/libc/thread/tls.h @@ -42,6 +42,7 @@ struct CosmoTib { }; extern int __threaded; +extern char __tls_morphed; extern unsigned __tls_index; char *_mktls(struct CosmoTib **); diff --git a/libc/thread/tls2.internal.h b/libc/thread/tls2.internal.h index 1bbe95c5e..b96693276 100644 --- a/libc/thread/tls2.internal.h +++ b/libc/thread/tls2.internal.h @@ -14,7 +14,11 @@ COSMOPOLITAN_C_START_ __funline struct CosmoTib *__get_tls_privileged(void) { char *tib, *lin = (char *)0x30; if (IsLinux() || IsFreebsd() || IsNetbsd() || IsOpenbsd() || IsMetal()) { - asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); + if (!__tls_morphed) { + asm("mov\t%%fs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); + } else { + asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); + } } else { asm("mov\t%%gs:(%1),%0" : "=a"(tib) : "r"(lin) : "memory"); if (IsWindows()) {