mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-02 17:28:30 +00:00
Make more threading improvements
- ASAN memory morgue is now lockless - Make C11 atomics header more portable - Rewrote pthread keys support to be lockless - Simplify Python's unicode table unpacking code - Make crash report write(2) closer to being atomic - Make it possible to strace/ftrace a single thread - ASAN now checks nul-terminated strings fast and properly - Windows fork() now restores TLS memory of calling thread
This commit is contained in:
parent
d7b88734cd
commit
e522aa3a07
189 changed files with 1363 additions and 1217 deletions
|
@ -17,7 +17,6 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
|
@ -25,6 +24,7 @@
|
|||
#include "libc/intrin/asmflag.h"
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/nexgen32e/msr.h"
|
||||
#include "libc/nexgen32e/x86feature.h"
|
||||
#include "libc/runtime/pc.internal.h"
|
||||
|
@ -47,7 +47,7 @@
|
|||
"d"((uint32_t)(val_ >> 32))); \
|
||||
} while (0)
|
||||
|
||||
int sys_enable_tls();
|
||||
int sys_set_tls();
|
||||
|
||||
static int arch_prctl_msr(int code, int64_t addr) {
|
||||
switch (code) {
|
||||
|
@ -96,7 +96,7 @@ static int arch_prctl_netbsd(int code, int64_t addr) {
|
|||
// we use _lwp_setprivate() instead of sysarch(X86_SET_FSBASE)
|
||||
// because the latter has a bug where signal handlers cause it
|
||||
// to be clobbered. please note, this doesn't apply to %gs :-)
|
||||
return sys_enable_tls(addr);
|
||||
return sys_set_tls(addr);
|
||||
case ARCH_GET_GS:
|
||||
// sysarch(X86_GET_GSBASE)
|
||||
return sys_arch_prctl(14, addr);
|
||||
|
@ -114,7 +114,7 @@ static int arch_prctl_xnu(int code, int64_t addr) {
|
|||
case ARCH_SET_GS:
|
||||
// thread_fast_set_cthread_self has a weird ABI
|
||||
e = errno;
|
||||
sys_enable_tls(addr);
|
||||
sys_set_tls(addr);
|
||||
errno = e;
|
||||
return 0;
|
||||
case ARCH_GET_FS:
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/intrin/nopl.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/brk.internal.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_
|
||||
#define COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
struct ProtectNt {
|
||||
uint32_t flags1;
|
||||
uint32_t flags2;
|
||||
};
|
||||
|
||||
struct DirectMap {
|
||||
void *addr;
|
||||
int64_t maphandle;
|
||||
};
|
||||
|
||||
struct DirectMap sys_mmap(void *, size_t, int, int, int, int64_t);
|
||||
struct DirectMap sys_mmap_nt(void *, size_t, int, int, int, int64_t);
|
||||
struct DirectMap sys_mmap_metal(void *, size_t, int, int, int, int64_t);
|
||||
int sys_munmap_metal(void *, size_t);
|
||||
uint32_t __prot2nt(int, bool);
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* COSMOPOLITAN_LIBC_RUNTIME_DIRECTMAP_H_ */
|
|
@ -16,52 +16,24 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "ape/sections.internal.h"
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/struct/sigset.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/asancodes.h"
|
||||
#include "libc/intrin/atomic.h"
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/msr.h"
|
||||
#include "libc/nt/thread.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/morph.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/stdalign.internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/sysv/consts/nrlinux.h"
|
||||
#include "libc/thread/posixthread.internal.h"
|
||||
#include "libc/thread/tls.h"
|
||||
#include "third_party/xed/x86.h"
|
||||
|
||||
#define __NR_sysarch 0x000000a5 // freebsd+netbsd
|
||||
#define AMD64_SET_GSBASE 131 // freebsd
|
||||
#define AMD64_SET_FSBASE 129 // freebsd
|
||||
#define X86_SET_GSBASE 16 // netbsd
|
||||
#define X86_SET_FSBASE 17 // netbsd
|
||||
|
||||
#define __NR___set_tcb 0x00000149
|
||||
#define __NR__lwp_setprivate 0x0000013d
|
||||
#define __NR_thread_fast_set_cthread_self 0x03000003
|
||||
|
||||
#define _TLSZ ((intptr_t)_tls_size)
|
||||
#define _TLDZ ((intptr_t)_tdata_size)
|
||||
#define _TIBZ sizeof(struct CosmoTib)
|
||||
|
||||
int sys_enable_tls();
|
||||
|
||||
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
__msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
|
||||
|
||||
struct PosixThread _pthread_main;
|
||||
extern unsigned char __tls_mov_nt_rax[];
|
||||
extern unsigned char __tls_add_nt_rax[];
|
||||
|
@ -102,12 +74,12 @@ _Alignas(TLS_ALIGNMENT) static char __static_tls[5008];
|
|||
* arch_prctl() function. However, such programs might not be portable
|
||||
* and your `errno` variable also won't be thread safe anymore.
|
||||
*/
|
||||
privileged void __enable_tls(void) {
|
||||
void __enable_tls(void) {
|
||||
int tid;
|
||||
size_t siz;
|
||||
struct CosmoTib *tib;
|
||||
char *mem, *tls;
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, alignof(__static_tls));
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, _Alignof(__static_tls));
|
||||
if (siz <= sizeof(__static_tls)) {
|
||||
// if tls requirement is small then use the static tls block
|
||||
// which helps avoid a system call for appes with little tls
|
||||
|
@ -134,6 +106,8 @@ privileged void __enable_tls(void) {
|
|||
tib->tib_self = tib;
|
||||
tib->tib_self2 = tib;
|
||||
tib->tib_errno = __errno;
|
||||
tib->tib_strace = __strace;
|
||||
tib->tib_ftrace = __ftrace;
|
||||
tib->tib_pthread = (pthread_t)&_pthread_main;
|
||||
if (IsLinux()) {
|
||||
// gnu/systemd guarantees pid==tid for the main thread so we can
|
||||
|
@ -149,124 +123,10 @@ privileged void __enable_tls(void) {
|
|||
__repmovsb(tls, _tdata_start, _TLDZ);
|
||||
|
||||
// ask the operating system to change the x86 segment register
|
||||
int ax, dx;
|
||||
if (IsWindows()) {
|
||||
__tls_index = __imp_TlsAlloc();
|
||||
_npassert(0 <= __tls_index && __tls_index < 64);
|
||||
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
|
||||
} else if (IsFreebsd()) {
|
||||
sys_enable_tls(AMD64_SET_FSBASE, tib);
|
||||
} else if (IsLinux()) {
|
||||
sys_enable_tls(ARCH_SET_FS, tib);
|
||||
} else if (IsNetbsd()) {
|
||||
// netbsd has sysarch(X86_SET_FSBASE) but we can't use that because
|
||||
// signal handlers will cause it to be reset due to not setting the
|
||||
// _mc_tlsbase field in struct mcontext_netbsd.
|
||||
sys_enable_tls(tib);
|
||||
} else if (IsOpenbsd()) {
|
||||
sys_enable_tls(tib);
|
||||
} else if (IsXnu()) {
|
||||
// thread_fast_set_cthread_self has a weird ABI
|
||||
int e = errno;
|
||||
sys_enable_tls((intptr_t)tib - 0x30);
|
||||
errno = e;
|
||||
} else {
|
||||
uint64_t val = (uint64_t)tib;
|
||||
asm volatile("wrmsr"
|
||||
: /* no outputs */
|
||||
: "c"(MSR_IA32_FS_BASE), "a"((uint32_t)val),
|
||||
"d"((uint32_t)(val >> 32)));
|
||||
}
|
||||
__set_tls(tib);
|
||||
|
||||
// We need to rewrite SysV _Thread_local code. You MUST use the
|
||||
// -mno-tls-direct-seg-refs flag which generates code like this
|
||||
//
|
||||
// 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R
|
||||
// 64 48 03 0R4 25 00 00 00 00 add %fs:0,%R
|
||||
//
|
||||
// Which on Mac we can replace with this:
|
||||
//
|
||||
// 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R
|
||||
//
|
||||
// Since we have no idea where the TLS instructions exist in the
|
||||
// binary, we need to disassemble the whole program image. This'll
|
||||
// potentially take a few milliseconds for some larger programs.
|
||||
//
|
||||
// We check `_tls_content` which is generated by the linker script
|
||||
// since it lets us determine ahead of time if _Thread_local vars
|
||||
// have actually been linked into this program.
|
||||
if ((intptr_t)_tls_content && (IsWindows() || IsXnu())) {
|
||||
int n;
|
||||
uint64_t w;
|
||||
sigset_t mask;
|
||||
unsigned m, dis;
|
||||
unsigned char *p;
|
||||
__morph_begin(&mask);
|
||||
|
||||
if (IsXnu()) {
|
||||
// Apple is quite straightforward to patch. We basically
|
||||
// just change the segment register, and the linear slot
|
||||
// address 0x30 was promised to us, according to Go team
|
||||
// https://github.com/golang/go/issues/23617
|
||||
dis = 0x30;
|
||||
} else {
|
||||
// MSVC __declspec(thread) generates binary code for this
|
||||
// %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
|
||||
// times we should be good.
|
||||
dis = 0x1480 + __tls_index * 8;
|
||||
}
|
||||
|
||||
// iterate over modifiable code looking for 9 byte instruction
|
||||
// this would take 30 ms using xed to enable tls on python.com
|
||||
for (p = _ereal; p + 9 <= __privileged_start; p += n) {
|
||||
|
||||
// use sse to zoom zoom to fs register prefixes
|
||||
// that way it'll take 1 ms to morph python.com
|
||||
while (p + 9 + 16 <= __privileged_start) {
|
||||
if ((m = __builtin_ia32_pmovmskb128(
|
||||
*(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144,
|
||||
0144, 0144, 0144, 0144, 0144, 0144,
|
||||
0144, 0144, 0144, 0144}))) {
|
||||
m = __builtin_ctzll(m);
|
||||
p += m;
|
||||
break;
|
||||
} else {
|
||||
p += 16;
|
||||
}
|
||||
}
|
||||
|
||||
// we're checking for the following expression:
|
||||
// 0144 == p[0] && // %fs
|
||||
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
|
||||
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
||||
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
||||
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
||||
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
||||
// 0000 == p[5] && // displacement (von Neumann endian)
|
||||
// 0000 == p[6] && // displacement
|
||||
// 0000 == p[7] && // displacement
|
||||
// 0000 == p[8] // displacement
|
||||
w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
|
||||
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
|
||||
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
|
||||
!p[8]) {
|
||||
|
||||
// now change the code
|
||||
p[0] = 0145; // change %fs to %gs
|
||||
p[5] = (dis & 0x000000ff) >> 000; // displacement
|
||||
p[6] = (dis & 0x0000ff00) >> 010; // displacement
|
||||
p[7] = (dis & 0x00ff0000) >> 020; // displacement
|
||||
p[8] = (dis & 0xff000000) >> 030; // displacement
|
||||
|
||||
// advance to the next instruction
|
||||
n = 9;
|
||||
} else {
|
||||
n = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__morph_end(&mask);
|
||||
}
|
||||
// rewrite the executable tls opcodes in memory
|
||||
__morph_tls();
|
||||
|
||||
// we are now allowed to use tls
|
||||
__tls_enabled = true;
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "libc/calls/wincrash.internal.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/fmt/itoa.h"
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
|
@ -46,7 +47,6 @@
|
|||
#include "libc/nt/runtime.h"
|
||||
#include "libc/nt/signals.h"
|
||||
#include "libc/nt/struct/ntexceptionpointers.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
@ -61,6 +61,7 @@ STATIC_YOINK("_check_sigchld");
|
|||
|
||||
extern int64_t __wincrashearly;
|
||||
bool32 __onntconsoleevent_nt(uint32_t);
|
||||
void kmalloc_unlock(void);
|
||||
|
||||
static textwindows wontreturn void AbortFork(const char *func) {
|
||||
STRACE("fork() %s() failed %d", func, GetLastError());
|
||||
|
@ -259,19 +260,19 @@ textwindows void WinMainForked(void) {
|
|||
}
|
||||
|
||||
textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
|
||||
bool ok;
|
||||
jmp_buf jb;
|
||||
uint32_t oldprot;
|
||||
char ok, threaded;
|
||||
char **args, **args2;
|
||||
struct CosmoTib *tib;
|
||||
char16_t pipename[64];
|
||||
bool needtls, threaded;
|
||||
int64_t reader, writer;
|
||||
struct NtStartupInfo startinfo;
|
||||
int i, n, pid, untrackpid, rc = -1;
|
||||
char *p, forkvar[6 + 21 + 1 + 21 + 1];
|
||||
struct NtProcessInformation procinfo;
|
||||
threaded = __threaded;
|
||||
needtls = __tls_enabled;
|
||||
tib = __tls_enabled ? __get_tls() : 0;
|
||||
if (!setjmp(jb)) {
|
||||
pid = untrackpid = __reservefd_unlocked(-1);
|
||||
reader = CreateNamedPipe(CreatePipeName(pipename),
|
||||
|
@ -293,7 +294,7 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
|
|||
#ifdef SYSDEBUG
|
||||
// If --strace was passed to this program, then propagate it the
|
||||
// forked process since the flag was removed by __intercept_flag
|
||||
if (__strace > 0) {
|
||||
if (strace_enabled(0) > 0) {
|
||||
for (n = 0; args[n];) ++n;
|
||||
args2 = alloca((n + 2) * sizeof(char *));
|
||||
for (i = 0; i < n; ++i) args2[i] = args[i];
|
||||
|
@ -345,8 +346,10 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
|
|||
}
|
||||
} else {
|
||||
rc = 0;
|
||||
if (needtls) {
|
||||
__enable_tls();
|
||||
if (tib && _weaken(__set_tls) && _weaken(__morph_tls)) {
|
||||
_weaken(__set_tls)(tib);
|
||||
_weaken(__morph_tls)();
|
||||
__tls_enabled = true;
|
||||
}
|
||||
if (threaded && !__threaded && _weaken(__enable_threads)) {
|
||||
_weaken(__enable_threads)();
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
/**
|
||||
* Enables plaintext function tracing if `--ftrace` flag is passed.
|
||||
|
@ -35,7 +36,7 @@
|
|||
textstartup int ftrace_init(void) {
|
||||
if (__intercept_flag(&__argc, __argv, "--ftrace")) {
|
||||
ftrace_install();
|
||||
++__ftrace;
|
||||
ftrace_enabled(+1);
|
||||
}
|
||||
return __argc;
|
||||
}
|
||||
|
|
|
@ -72,10 +72,13 @@ static privileged inline int GetNestingLevel(struct CosmoFtrace *ft,
|
|||
*/
|
||||
privileged void ftracer(void) {
|
||||
long stackuse;
|
||||
struct CosmoFtrace *ft;
|
||||
struct CosmoTib *tib;
|
||||
struct StackFrame *sf;
|
||||
struct CosmoFtrace *ft;
|
||||
if (__tls_enabled) {
|
||||
ft = &__get_tls_privileged()->tib_ftrace;
|
||||
tib = __get_tls_privileged();
|
||||
if (tib->tib_ftrace <= 0) return;
|
||||
ft = &tib->tib_ftracer;
|
||||
} else {
|
||||
ft = &g_ftrace;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ extern unsigned char _tls_size[];
|
|||
extern unsigned char _tls_content[];
|
||||
|
||||
void _init(void) hidden;
|
||||
void __morph_tls(void);
|
||||
void __enable_tls(void);
|
||||
void __enable_threads(void) hidden;
|
||||
void *__cxa_finalize(void *) hidden;
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "libc/intrin/bits.h"
|
||||
#include "libc/intrin/bsr.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/intrin/safemacros.internal.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
|
@ -39,7 +40,6 @@
|
|||
#include "libc/nt/process.h"
|
||||
#include "libc/nt/runtime.h"
|
||||
#include "libc/nt/struct/processmemorycounters.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
|
118
libc/runtime/morph_tls.c
Normal file
118
libc/runtime/morph_tls.c
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "ape/sections.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/morph.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
||||
|
||||
privileged void __morph_tls(void) {
|
||||
// We need to rewrite SysV _Thread_local code. You MUST use the
|
||||
// -mno-tls-direct-seg-refs flag which generates code like this
|
||||
//
|
||||
// 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R
|
||||
// 64 48 03 0R4 25 00 00 00 00 add %fs:0,%R
|
||||
//
|
||||
// Which on Mac we can replace with this:
|
||||
//
|
||||
// 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R
|
||||
//
|
||||
// Since we have no idea where the TLS instructions exist in the
|
||||
// binary, we need to disassemble the whole program image. This'll
|
||||
// potentially take a few milliseconds for some larger programs.
|
||||
//
|
||||
// We check `_tls_content` which is generated by the linker script
|
||||
// since it lets us determine ahead of time if _Thread_local vars
|
||||
// have actually been linked into this program.
|
||||
if ((intptr_t)_tls_content && (IsWindows() || IsXnu())) {
|
||||
int n;
|
||||
uint64_t w;
|
||||
sigset_t mask;
|
||||
unsigned m, dis;
|
||||
unsigned char *p;
|
||||
__morph_begin(&mask);
|
||||
|
||||
if (IsXnu()) {
|
||||
// Apple is quite straightforward to patch. We basically
|
||||
// just change the segment register, and the linear slot
|
||||
// address 0x30 was promised to us, according to Go team
|
||||
// https://github.com/golang/go/issues/23617
|
||||
dis = 0x30;
|
||||
} else {
|
||||
// MSVC __declspec(thread) generates binary code for this
|
||||
// %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
|
||||
// times we should be good.
|
||||
dis = 0x1480 + __tls_index * 8;
|
||||
}
|
||||
|
||||
// iterate over modifiable code looking for 9 byte instruction
|
||||
// this would take 30 ms using xed to enable tls on python.com
|
||||
for (p = _ereal; p + 9 <= __privileged_start; p += n) {
|
||||
|
||||
// use sse to zoom zoom to fs register prefixes
|
||||
// that way it'll take 1 ms to morph python.com
|
||||
while (p + 9 + 16 <= __privileged_start) {
|
||||
if ((m = __builtin_ia32_pmovmskb128(
|
||||
*(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144,
|
||||
0144, 0144, 0144, 0144, 0144, 0144,
|
||||
0144, 0144, 0144, 0144}))) {
|
||||
m = __builtin_ctzll(m);
|
||||
p += m;
|
||||
break;
|
||||
} else {
|
||||
p += 16;
|
||||
}
|
||||
}
|
||||
|
||||
// we're checking for the following expression:
|
||||
// 0144 == p[0] && // %fs
|
||||
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
|
||||
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
||||
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
||||
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
||||
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
||||
// 0000 == p[5] && // displacement (von Neumann endian)
|
||||
// 0000 == p[6] && // displacement
|
||||
// 0000 == p[7] && // displacement
|
||||
// 0000 == p[8] // displacement
|
||||
w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
|
||||
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
|
||||
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
|
||||
!p[8]) {
|
||||
|
||||
// now change the code
|
||||
p[0] = 0145; // change %fs to %gs
|
||||
p[5] = (dis & 0x000000ff) >> 000; // displacement
|
||||
p[6] = (dis & 0x0000ff00) >> 010; // displacement
|
||||
p[7] = (dis & 0x00ff0000) >> 020; // displacement
|
||||
p[8] = (dis & 0xff000000) >> 030; // displacement
|
||||
|
||||
// advance to the next instruction
|
||||
n = 9;
|
||||
} else {
|
||||
n = 1;
|
||||
}
|
||||
}
|
||||
|
||||
__morph_end(&mask);
|
||||
}
|
||||
}
|
|
@ -16,8 +16,8 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/nt/memory.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
|
||||
|
|
|
@ -21,12 +21,12 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nt/runtime.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/sysv/consts/map.h"
|
||||
|
|
|
@ -22,13 +22,13 @@
|
|||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/asan.internal.h"
|
||||
#include "libc/intrin/directmap.internal.h"
|
||||
#include "libc/intrin/likely.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/log/backtrace.internal.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/log/log.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
|
|
|
@ -176,8 +176,8 @@ textstartup void __printargs(const char *prologue) {
|
|||
|
||||
if (!PLEDGED(STDIO)) return;
|
||||
|
||||
--__ftrace;
|
||||
--__strace;
|
||||
ftrace_enabled(-1);
|
||||
strace_enabled(-1);
|
||||
e = errno;
|
||||
|
||||
PRINT("");
|
||||
|
@ -618,7 +618,7 @@ textstartup void __printargs(const char *prologue) {
|
|||
}
|
||||
|
||||
PRINT("");
|
||||
++__strace;
|
||||
++__ftrace;
|
||||
strace_enabled(+1);
|
||||
ftrace_enabled(+1);
|
||||
errno = e;
|
||||
}
|
||||
|
|
|
@ -16,8 +16,8 @@ extern char **__envp; /* CRT */
|
|||
extern unsigned long *__auxv; /* CRT */
|
||||
extern intptr_t __oldstack; /* CRT */
|
||||
extern uint64_t __nosync; /* SYS */
|
||||
extern _Atomic(int) __ftrace; /* SYS */
|
||||
extern _Atomic(int) __strace; /* SYS */
|
||||
extern int __strace; /* SYS */
|
||||
extern int __ftrace; /* SYS */
|
||||
extern char *program_invocation_name; /* RII */
|
||||
extern char *program_invocation_short_name; /* RII */
|
||||
extern uint64_t __syscount; /* RII */
|
||||
|
@ -93,6 +93,8 @@ void _weakfree(void *);
|
|||
void free_s(void *) paramsnonnull() libcesque;
|
||||
int OpenExecutable(void);
|
||||
int ftrace_install(void);
|
||||
int ftrace_enabled(int);
|
||||
int strace_enabled(int);
|
||||
long GetResourceLimit(int);
|
||||
long GetMaxFd(void);
|
||||
char *GetProgramExecutableName(void);
|
||||
|
|
59
libc/runtime/set_tls.c
Normal file
59
libc/runtime/set_tls.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/nexgen32e/msr.h"
|
||||
#include "libc/nt/thread.h"
|
||||
#include "libc/thread/tls.h"
|
||||
|
||||
int sys_set_tls();
|
||||
|
||||
void __set_tls(struct CosmoTib *tib) {
|
||||
// ask the operating system to change the x86 segment register
|
||||
int ax, dx;
|
||||
if (IsWindows()) {
|
||||
__tls_index = TlsAlloc();
|
||||
_npassert(0 <= __tls_index && __tls_index < 64);
|
||||
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
|
||||
} else if (IsFreebsd()) {
|
||||
sys_set_tls(129 /*AMD64_SET_FSBASE*/, tib);
|
||||
} else if (IsLinux()) {
|
||||
sys_set_tls(ARCH_SET_FS, tib);
|
||||
} else if (IsNetbsd()) {
|
||||
// netbsd has sysarch(X86_SET_FSBASE) but we can't use that because
|
||||
// signal handlers will cause it to be reset due to not setting the
|
||||
// _mc_tlsbase field in struct mcontext_netbsd.
|
||||
sys_set_tls(tib);
|
||||
} else if (IsOpenbsd()) {
|
||||
sys_set_tls(tib);
|
||||
} else if (IsXnu()) {
|
||||
// thread_fast_set_cthread_self has a weird ABI
|
||||
int e = errno;
|
||||
sys_set_tls((intptr_t)tib - 0x30);
|
||||
errno = e;
|
||||
} else {
|
||||
uint64_t val = (uint64_t)tib;
|
||||
asm volatile("wrmsr"
|
||||
: /* no outputs */
|
||||
: "c"(MSR_IA32_FS_BASE), "a"((uint32_t)val),
|
||||
"d"((uint32_t)(val >> 32)));
|
||||
}
|
||||
}
|
|
@ -31,7 +31,7 @@ textstartup int __strace_init(int argc, char **argv, char **envp, long *auxv) {
|
|||
/* asan isn't initialized yet at runlevel 300 */
|
||||
if (__intercept_flag(&argc, argv, "--strace") ||
|
||||
__atoul(nulltoempty(__getenv(envp, "STRACE")))) {
|
||||
atomic_store_explicit(&__strace, 1, memory_order_relaxed);
|
||||
strace_enabled(+1);
|
||||
}
|
||||
return (__argc = argc);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "libc/runtime/symbols.internal.h"
|
||||
|
||||
void __init_symbols(void) {
|
||||
if (__strace || (IsAsan() && _weaken(__die))) {
|
||||
if (__strace > 0 || (IsAsan() && _weaken(__die))) {
|
||||
GetSymbolTable();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,27 +19,15 @@
|
|||
#include "libc/calls/state.internal.h"
|
||||
#include "libc/calls/syscall_support-nt.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/elf/pf2prot.internal.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/fmt/fmt.h"
|
||||
#include "libc/intrin/bits.h"
|
||||
#include "libc/intrin/describeflags.internal.h"
|
||||
#include "libc/intrin/nomultics.internal.h"
|
||||
#include "libc/intrin/pushpop.h"
|
||||
#include "libc/intrin/strace.internal.h"
|
||||
#include "libc/intrin/weaken.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/nt2sysv.h"
|
||||
#include "libc/nexgen32e/rdtsc.h"
|
||||
#include "libc/nt/console.h"
|
||||
#include "libc/nt/enum/consolemodeflags.h"
|
||||
#include "libc/nt/enum/filemapflags.h"
|
||||
#include "libc/nt/enum/filetype.h"
|
||||
#include "libc/nt/enum/loadlibrarysearch.h"
|
||||
#include "libc/nt/enum/pageflags.h"
|
||||
#include "libc/nt/enum/version.h"
|
||||
#include "libc/nt/files.h"
|
||||
#include "libc/nt/memory.h"
|
||||
#include "libc/nt/pedef.internal.h"
|
||||
#include "libc/nt/process.h"
|
||||
|
@ -47,16 +35,11 @@
|
|||
#include "libc/nt/signals.h"
|
||||
#include "libc/nt/struct/ntexceptionpointers.h"
|
||||
#include "libc/nt/struct/teb.h"
|
||||
#include "libc/nt/synchronization.h"
|
||||
#include "libc/nt/thunk/msabi.h"
|
||||
#include "libc/runtime/directmap.internal.h"
|
||||
#include "libc/runtime/internal.h"
|
||||
#include "libc/runtime/memtrack.internal.h"
|
||||
#include "libc/runtime/runtime.h"
|
||||
#include "libc/runtime/stack.h"
|
||||
#include "libc/runtime/winargs.internal.h"
|
||||
#include "libc/sock/internal.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/str/utf16.h"
|
||||
|
||||
#if IsTiny()
|
||||
__msabi extern typeof(CreateFileMapping) *const __imp_CreateFileMappingW;
|
||||
|
@ -79,7 +62,6 @@ __msabi extern typeof(VirtualProtect) *const __imp_VirtualProtect;
|
|||
* TODO: How can we ensure we never overlap with KERNEL32.DLL?
|
||||
*/
|
||||
|
||||
extern uint32_t __winmainpid;
|
||||
extern int64_t __wincrashearly;
|
||||
extern const char kConsoleHandles[3];
|
||||
|
||||
|
@ -169,7 +151,6 @@ __msabi static textwindows wontreturn void WinMainNew(const char16_t *cmdline) {
|
|||
version = NtGetPeb()->OSMajorVersion;
|
||||
__oldstack = (intptr_t)__builtin_frame_address(0);
|
||||
if ((intptr_t)v_ntsubsystem == kNtImageSubsystemWindowsCui && version >= 10) {
|
||||
__winmainpid = __pid;
|
||||
rc = SetConsoleCP(kNtCpUtf8);
|
||||
NTTRACE("SetConsoleCP(kNtCpUtf8) → %hhhd", rc);
|
||||
rc = SetConsoleOutputCP(kNtCpUtf8);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue