mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-27 13:00:28 +00:00
Always initialize thread local storage
We had previously not enabled TLS in MODE=tiny in order to keep the smallest example programs (e.g. life.com) just 16kb in size. But it was error prone doing that, so now we just always enable it because this change uses hacks to ensure it won't increase life.com's size. This change also fixes a bug on NetBSD, where signal handlers would break thread local storage if SA_SIGINFO was being used. This looks like it might be a bug in NetBSD, but it's got a simple workaround.
This commit is contained in:
parent
057e8f5b54
commit
69f4152f38
33 changed files with 174 additions and 123 deletions
|
@ -199,7 +199,7 @@ int arch_prctl(int code, int64_t addr) {
|
|||
case METAL:
|
||||
return arch_prctl_msr(code, addr);
|
||||
case FREEBSD:
|
||||
/* claims support but it appears not */
|
||||
// TODO(jart): this should use sysarch()
|
||||
return arch_prctl_freebsd(code, addr);
|
||||
case OPENBSD:
|
||||
return arch_prctl_openbsd(code, addr);
|
||||
|
|
|
@ -565,21 +565,9 @@ static int CloneLinux(int (*func)(void *arg, int tid), char *stk, size_t stksz,
|
|||
* either terminated or has finished using its stack memory
|
||||
*
|
||||
* - `CLONE_SETTLS` is needed if you intend to specify the `tls`
|
||||
* argument, which provides a fast-path solution for changing the
|
||||
* appropriate TLS segment register within the child thread. The
|
||||
* child thread may then obtain a reference to the TIB address you
|
||||
* supplied, by calling __get_tls(). Your C library holds certain
|
||||
* expectations about the layout of your Thread Information Block
|
||||
* (TIB), which are all documented by __initialize_tls(). That
|
||||
* function can be used to initialize the first positive 64 bytes
|
||||
* of your TLS allocation, which is the memory Cosmopolitan Libc
|
||||
* wants for itself (and negative addresses are reserved by the
|
||||
* GNU Linker). Using this flag will transition the C runtime to a
|
||||
* `__tls_enabled` state automatically. If you use TLS for just
|
||||
* one thread, then you must be specify TLS for ALL THREADS. It's
|
||||
* a good idea to do that since TLS can offer considerable (i.e.
|
||||
* multiple orders of a magnitude) performance improvement for
|
||||
* TID-dependent C library services, e.g. recursive mutexes.
|
||||
* argument, which after thread creation may be accessed using
|
||||
* __get_tls(). Doing this means that `errno`, gettid(), etc.
|
||||
* correctly work. Caveat emptor if you choose not to do this.
|
||||
*
|
||||
* @param arg is passed as an argument to `func` in the child thread
|
||||
* @param tls may be used to set the thread local storage segment;
|
||||
|
@ -594,8 +582,9 @@ int clone(void *func, void *stk, size_t stksz, int flags, void *arg, int *ptid,
|
|||
int rc;
|
||||
struct CloneArgs *wt;
|
||||
|
||||
if (flags & CLONE_SETTLS) __enable_tls();
|
||||
if (flags & CLONE_THREAD) __enable_threads();
|
||||
if (flags & CLONE_THREAD) {
|
||||
__enable_threads();
|
||||
}
|
||||
|
||||
if (!func) {
|
||||
rc = einval();
|
||||
|
|
|
@ -76,22 +76,14 @@ cosmo: push %rbp
|
|||
ret
|
||||
.endfn cosmo,weak
|
||||
|
||||
#if !IsTiny()
|
||||
// Enable TLS early if _Thread_local is used
|
||||
// In MODE=tiny you may need to explicitly call __enable_tls()
|
||||
// Otherwise this would bloat life.com from 16kb → 32kb D:
|
||||
// Enables Thread Local Storage.
|
||||
.init.start 304,_init_tls
|
||||
mov $_tls_content,%eax
|
||||
test %eax,%eax
|
||||
jz 1f
|
||||
push %rdi
|
||||
push %rsi
|
||||
call __enable_tls
|
||||
pop %rsi
|
||||
pop %rdi
|
||||
jz 1f
|
||||
1: .init.end 304,_init_tls
|
||||
#endif
|
||||
.init.end 304,_init_tls
|
||||
|
||||
#if !IsTiny()
|
||||
// Creates deterministically addressed stack we can use
|
||||
|
|
|
@ -16,12 +16,16 @@
|
|||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/bits.h"
|
||||
#include "libc/bits/weaken.h"
|
||||
#include "libc/calls/calls.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
#include "libc/calls/syscall-sysv.internal.h"
|
||||
#include "libc/dce.h"
|
||||
#include "libc/errno.h"
|
||||
#include "libc/intrin/kprintf.h"
|
||||
#include "libc/log/libfatal.internal.h"
|
||||
#include "libc/macros.internal.h"
|
||||
#include "libc/nexgen32e/threaded.h"
|
||||
#include "libc/nt/thread.h"
|
||||
|
@ -52,12 +56,35 @@ __msabi extern typeof(TlsAlloc) *const __imp_TlsAlloc;
|
|||
|
||||
extern unsigned char __tls_mov_nt_rax[];
|
||||
extern unsigned char __tls_add_nt_rax[];
|
||||
_Alignas(long) static char __static_tls[5008];
|
||||
|
||||
/**
|
||||
* Enables thread local storage.
|
||||
*
|
||||
* This function is always called by the core runtime to guarantee TLS
|
||||
* is always available to your program. You must build your code using
|
||||
* -mno-tls-direct-seg-refs if you want to use _Thread_local.
|
||||
*
|
||||
* You can use __get_tls() to get the linear address of your tib. When
|
||||
* accessing TLS via privileged code you must use __get_tls_privileged
|
||||
* because we need code morphing to support The New Technology and XNU
|
||||
*
|
||||
* On XNU and The New Technology, this function imposes 1ms of latency
|
||||
* during startup for larger binaries like Python.
|
||||
*
|
||||
* If you don't want TLS and you're sure you're not using it, then you
|
||||
* can disable it as follows:
|
||||
*
|
||||
* int main() {
|
||||
* __tls_enabled = false;
|
||||
* // do stuff
|
||||
* }
|
||||
*
|
||||
* This is useful if you want to wrestle back control of %fs using the
|
||||
* arch_prctl() function. However, such programs might not be portable
|
||||
* and your `errno` variable also won't be thread safe anymore.
|
||||
*/
|
||||
privileged void __enable_tls(void) {
|
||||
if (__tls_enabled) return;
|
||||
STRACE("__enable_tls()");
|
||||
|
||||
// allocate tls memory for main process
|
||||
|
@ -74,20 +101,42 @@ privileged void __enable_tls(void) {
|
|||
size_t siz;
|
||||
cthread_t tib;
|
||||
char *mem, *tls;
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, FRAMESIZE);
|
||||
mem = _mapanon(siz);
|
||||
siz = ROUNDUP(_TLSZ + _TIBZ, alignof(__static_tls));
|
||||
if (siz <= sizeof(__static_tls)) {
|
||||
// if tls requirement is small then use the static tls block
|
||||
// which helps avoid a system call for appes with little tls
|
||||
// this is crucial to keeping life.com 16 kilobytes in size!
|
||||
_Static_assert(alignof(__static_tls) >= alignof(cthread_t));
|
||||
mem = __static_tls;
|
||||
} else {
|
||||
// if this binary needs a hefty tls block then we'll bank on
|
||||
// malloc() being linked, which links _mapanon(). otherwise
|
||||
// if you exceed this, you need to STATIC_YOINK("_mapanon").
|
||||
// please note that it's probably too early to call calloc()
|
||||
assert(weaken(_mapanon));
|
||||
siz = ROUNDUP(siz, FRAMESIZE);
|
||||
mem = weaken(_mapanon)(siz);
|
||||
assert(mem);
|
||||
}
|
||||
tib = (cthread_t)(mem + siz - _TIBZ);
|
||||
tls = mem + siz - _TIBZ - _TLSZ;
|
||||
tib->self = tib;
|
||||
tib->self2 = tib;
|
||||
tib->err = __errno;
|
||||
tib->tid = sys_gettid();
|
||||
memmove(tls, _tdata_start, _TLDZ);
|
||||
if (IsLinux()) {
|
||||
// gnu/systemd guarantees pid==tid for the main thread so we can
|
||||
// avoid issuing a superfluous system call at startup in program
|
||||
tib->tid = __pid;
|
||||
} else {
|
||||
tib->tid = sys_gettid();
|
||||
}
|
||||
__repmovsb(tls, _tdata_start, _TLDZ);
|
||||
|
||||
// ask the operating system to change the x86 segment register
|
||||
int ax, dx;
|
||||
if (IsWindows()) {
|
||||
__tls_index = __imp_TlsAlloc();
|
||||
assert(0 <= __tls_index && __tls_index < 64);
|
||||
asm("mov\t%1,%%gs:%0" : "=m"(*((long *)0x1480 + __tls_index)) : "r"(tib));
|
||||
} else if (IsFreebsd()) {
|
||||
asm volatile("syscall"
|
||||
|
@ -95,9 +144,12 @@ privileged void __enable_tls(void) {
|
|||
: "0"(__NR_sysarch), "D"(AMD64_SET_FSBASE), "S"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsNetbsd()) {
|
||||
// netbsd has sysarch(X86_SET_FSBASE) but we can't use that because
|
||||
// signal handlers will cause it to be reset due to net setting the
|
||||
// _mc_tlsbase field in struct mcontext_netbsd.
|
||||
asm volatile("syscall"
|
||||
: "=a"(ax), "=d"(dx)
|
||||
: "0"(__NR_sysarch), "D"(X86_SET_FSBASE), "S"(tib)
|
||||
: "0"(__NR__lwp_setprivate), "D"(tib)
|
||||
: "rcx", "r11", "memory", "cc");
|
||||
} else if (IsXnu()) {
|
||||
asm volatile("syscall"
|
||||
|
@ -179,7 +231,7 @@ privileged void __enable_tls(void) {
|
|||
}
|
||||
|
||||
// we're checking for the following expression:
|
||||
// 0144 == p[0] && // fs
|
||||
// 0144 == p[0] && // %fs
|
||||
// 0110 == p[1] && // rex.w (64-bit operand size)
|
||||
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
||||
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
||||
|
@ -195,7 +247,7 @@ privileged void __enable_tls(void) {
|
|||
!p[8]) {
|
||||
|
||||
// now change the code
|
||||
p[0] = 0145; // this changes gs segment to fs segment
|
||||
p[0] = 0145; // change %fs to %gs
|
||||
p[5] = (dis & 0x000000ff) >> 000; // displacement
|
||||
p[6] = (dis & 0x0000ff00) >> 010; // displacement
|
||||
p[7] = (dis & 0x00ff0000) >> 020; // displacement
|
||||
|
|
|
@ -92,7 +92,6 @@ privileged void ftracer(void) {
|
|||
|
||||
textstartup int ftrace_install(void) {
|
||||
if (GetSymbolTable()) {
|
||||
__enable_tls();
|
||||
g_stackdigs = LengthInt64Thousands(GetStackSize());
|
||||
return __hook(ftrace_hook, GetSymbolTable());
|
||||
} else {
|
||||
|
|
|
@ -27,7 +27,7 @@ extern unsigned char _tls_size[];
|
|||
extern unsigned char _tls_content[];
|
||||
|
||||
void _init(void) hidden;
|
||||
void __enable_tls(void) hidden;
|
||||
void __enable_tls(void);
|
||||
void __enable_threads(void) hidden;
|
||||
void __restorewintty(void) hidden;
|
||||
void *__cxa_finalize(void *) hidden;
|
||||
|
|
|
@ -55,15 +55,16 @@
|
|||
*
|
||||
* That is performed automatically for unit test executables.
|
||||
*
|
||||
* @return memory map address on success, or null w/ errrno
|
||||
* @return memory map address on success, or null w/ errno
|
||||
*/
|
||||
void *_mapanon(size_t size) {
|
||||
/* asan runtime depends on this function */
|
||||
void *m;
|
||||
m = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (m == MAP_FAILED && weaken(__oom_hook)) {
|
||||
weaken(__oom_hook)(size);
|
||||
return 0;
|
||||
if (m != MAP_FAILED) {
|
||||
return m;
|
||||
}
|
||||
return m;
|
||||
if (errno == ENOMEM && weaken(__oom_hook)) {
|
||||
weaken(__oom_hook)(size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#define ShouldUseMsabiAttribute() 1
|
||||
#include "libc/assert.h"
|
||||
#include "libc/bits/asmflag.h"
|
||||
#include "libc/calls/internal.h"
|
||||
#include "libc/calls/strace.internal.h"
|
||||
|
@ -58,10 +59,28 @@ static privileged void __morph_mprotect(void *addr, size_t size, int prot,
|
|||
* @return 0 on success, or -1 w/ errno
|
||||
*/
|
||||
privileged void __morph_begin(void) {
|
||||
int ax;
|
||||
bool cf;
|
||||
intptr_t dx;
|
||||
sigset_t ss = {{-1, -1}};
|
||||
STRACE("__morph_begin()");
|
||||
if (!IsWindows()) {
|
||||
sys_sigprocmask(SIG_BLOCK, &ss, &oldss);
|
||||
if (!IsOpenbsd()) {
|
||||
asm volatile("mov\t$8,%%r10d\n\t"
|
||||
"syscall"
|
||||
: "=a"(ax), "=d"(dx)
|
||||
: "0"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(&ss),
|
||||
"1"(&oldss)
|
||||
: "rcx", "r10", "r11", "memory", "cc");
|
||||
assert(!ax);
|
||||
} else {
|
||||
asm volatile(CFLAG_ASM("syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx)
|
||||
: "1"(__NR_sigprocmask), "D"(SIG_BLOCK), "S"(-1u)
|
||||
: "rcx", "r11", "memory");
|
||||
oldss.__bits[0] = ax & 0xffffffff;
|
||||
assert(!cf);
|
||||
}
|
||||
}
|
||||
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_WRITE,
|
||||
kNtPageWritecopy);
|
||||
|
@ -71,10 +90,28 @@ privileged void __morph_begin(void) {
|
|||
* Begins code morphing execuatble.
|
||||
*/
|
||||
privileged void __morph_end(void) {
|
||||
int ax;
|
||||
long dx;
|
||||
bool cf;
|
||||
__morph_mprotect(_base, __privileged_addr - _base, PROT_READ | PROT_EXEC,
|
||||
kNtPageExecuteRead);
|
||||
if (!IsWindows()) {
|
||||
sys_sigprocmask(SIG_SETMASK, &oldss, 0);
|
||||
if (!IsOpenbsd()) {
|
||||
asm volatile("mov\t$8,%%r10d\n\t"
|
||||
"syscall"
|
||||
: "=a"(ax), "=d"(dx)
|
||||
: "0"(__NR_sigprocmask), "D"(SIG_SETMASK), "S"(&oldss),
|
||||
"1"(0)
|
||||
: "rcx", "r10", "r11", "memory", "cc");
|
||||
assert(!ax);
|
||||
} else {
|
||||
asm volatile(CFLAG_ASM("syscall")
|
||||
: CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx)
|
||||
: "1"(__NR_sigprocmask), "D"(SIG_SETMASK),
|
||||
"S"(oldss.__bits[0])
|
||||
: "rcx", "r11", "memory");
|
||||
assert(!cf);
|
||||
}
|
||||
}
|
||||
STRACE("__morph_end()");
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue