diff --git a/ape/aarch64.lds b/ape/aarch64.lds index cec91ae89..57b22b4b5 100644 --- a/ape/aarch64.lds +++ b/ape/aarch64.lds @@ -293,7 +293,6 @@ _tdata_size = _tdata_end - _tdata_start; _tbss_size = _tbss_end - _tbss_start; _tbss_offset = _tbss_start - _tdata_start; _tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start); -_tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)); - -ASSERT(ALIGNOF(.tdata) <= TLS_ALIGNMENT && ALIGNOF(.tbss) <= TLS_ALIGNMENT, - "_Thread_local _Alignof can't exceed TLS_ALIGNMENT"); +_tdata_align = ALIGNOF(.tdata); +_tbss_align = ALIGNOF(.tbss); +_tls_align = MAX(TLS_ALIGNMENT, MAX(ALIGNOF(.tdata), ALIGNOF(.tbss))); diff --git a/ape/ape.lds b/ape/ape.lds index 9b1d40e5a..eaaf3ed59 100644 --- a/ape/ape.lds +++ b/ape/ape.lds @@ -553,7 +553,9 @@ _tdata_size = _tdata_end - _tdata_start; _tbss_size = _tbss_end - _tbss_start; _tbss_offset = _tbss_start - _tdata_start; _tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start); -_tls_align = 1; +_tdata_align = ALIGNOF(.tdata); +_tbss_align = ALIGNOF(.tbss); +_tls_align = MAX(TLS_ALIGNMENT, MAX(ALIGNOF(.tdata), ALIGNOF(.tbss))); ape_cod_offset = 0; ape_cod_vaddr = ADDR(.head); diff --git a/ape/sections.internal.h b/ape/sections.internal.h index 6bc8cc312..75e544c46 100644 --- a/ape/sections.internal.h +++ b/ape/sections.internal.h @@ -16,6 +16,8 @@ extern unsigned char _tdata_end[] __attribute__((__weak__)); extern unsigned char _tbss_start[] __attribute__((__weak__)); extern unsigned char _tbss_end[] __attribute__((__weak__)); extern unsigned char _tls_align[] __attribute__((__weak__)); +extern unsigned char _tdata_align[] __attribute__((__weak__)); +extern unsigned char _tbss_align[] __attribute__((__weak__)); extern unsigned char __test_start[] __attribute__((__weak__)); extern unsigned char __ro[] __attribute__((__weak__)); extern unsigned char __data_start[] __attribute__((__weak__)); diff --git a/libc/calls/sigaction.c b/libc/calls/sigaction.c index 44536bd2b..82a534970 100644 --- a/libc/calls/sigaction.c +++ b/libc/calls/sigaction.c @@ -255,7 +255,7 @@ static int __sigaction(int sig, const struct sigaction *act, // xnu silicon claims to support sa_resethand but it does nothing // this can be tested, since it clears the bit from flags as well if (!rc && oldact && - (((struct sigaction_silicon *)ap)->sa_flags & SA_RESETHAND)) { + (((struct sigaction_silicon *)oldact)->sa_flags & SA_RESETHAND)) { ((struct sigaction_silicon *)oldact)->sa_flags |= SA_RESETHAND; } } diff --git a/libc/runtime/enable_tls.c b/libc/runtime/enable_tls.c index 2be328cc1..c55f25aad 100644 --- a/libc/runtime/enable_tls.c +++ b/libc/runtime/enable_tls.c @@ -112,10 +112,6 @@ static unsigned long ParseMask(const char *str) { * and your `errno` variable also won't be thread safe anymore. */ textstartup void __enable_tls(void) { - int tid; - size_t siz; - char *mem, *tls; - struct CosmoTib *tib; // Here's the layout we're currently using: // @@ -138,7 +134,8 @@ textstartup void __enable_tls(void) { #ifdef __x86_64__ - siz = ROUNDUP(I(_tls_size) + sizeof(*tib), TLS_ALIGNMENT); + char *mem; + size_t siz = ROUNDUP(I(_tls_size) + sizeof(struct CosmoTib), TLS_ALIGNMENT); if (siz <= sizeof(__static_tls)) { // if tls requirement is small then use the static tls block // which helps avoid a system call for appes with little tls @@ -158,28 +155,44 @@ textstartup void __enable_tls(void) { kAsanProtected); } - tib = (struct CosmoTib *)(mem + siz - sizeof(*tib)); - tls = mem + siz - sizeof(*tib) - I(_tls_size); + struct CosmoTib *tib = (struct CosmoTib *)(mem + siz - sizeof(*tib)); + char *tls = mem + siz - sizeof(*tib) - I(_tls_size); + + // copy in initialized data section + if (I(_tdata_size)) { + if (IsAsan()) { + __asan_memcpy(tls, _tdata_start, I(_tdata_size)); + } else { + memcpy(tls, _tdata_start, I(_tdata_size)); + } + } #elif defined(__aarch64__) - size_t hiz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *), I(_tls_align)); - siz = hiz + I(_tls_size); - if (siz <= sizeof(__static_tls)) { + uintptr_t size = ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) + // + ROUNDUP(sizeof(uintptr_t) * 2, I(_tdata_align)) + // + ROUNDUP(I(_tdata_size), I(_tbss_align)) + // + I(_tbss_size); + + char *mem; + if (I(_tls_align) <= TLS_ALIGNMENT && size <= sizeof(__static_tls)) { mem = __static_tls; } else { - mem = _weaken(_mapanon)(siz); + mem = _weaken(_mapanon)(size); } - if (IsAsan()) { - // there's a roundup(pagesize) gap between .tdata and .tbss - // poison that empty space - __asan_poison(mem + hiz + I(_tdata_size), I(_tbss_offset) - I(_tdata_size), - kAsanProtected); - } + struct CosmoTib *tib = + (struct CosmoTib *)(mem + + ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) - + sizeof(struct CosmoTib)); - tib = (struct CosmoTib *)mem; - tls = mem + hiz; + uintptr_t *dtv = (uintptr_t *)(tib + 1); + size_t dtv_size = sizeof(uintptr_t) * 2; + + char *tdata = (char *)dtv + ROUNDUP(dtv_size, I(_tdata_align)); + if (I(_tdata_size)) { + memmove(tdata, _tdata_start, I(_tdata_size)); + } // Set the DTV. // @@ -189,8 +202,8 @@ textstartup void __enable_tls(void) { // // @see musl/src/env/__init_tls.c // @see https://chao-tic.github.io/blog/2018/12/25/tls - ((uintptr_t *)tls)[-2] = 1; - ((void **)tls)[-1] = tls; + dtv[0] = 1; + dtv[1] = (uintptr_t)tdata; #else #error "unsupported architecture" @@ -213,6 +226,8 @@ textstartup void __enable_tls(void) { } else if (IsXnuSilicon()) { tib->tib_syshand = __syslib->__pthread_self(); } + + int tid; if (IsLinux() || IsXnuSilicon()) { // gnu/systemd guarantees pid==tid for the main thread so we can // avoid issuing a superfluous system call at startup in program @@ -237,15 +252,6 @@ textstartup void __enable_tls(void) { _pthread_list = &_pthread_static.list; atomic_store_explicit(&_pthread_static.ptid, tid, memory_order_relaxed); - // copy in initialized data section - if (I(_tdata_size)) { - if (IsAsan()) { - __asan_memcpy(tls, _tdata_start, I(_tdata_size)); - } else { - memcpy(tls, _tdata_start, I(_tdata_size)); - } - } - // ask the operating system to change the x86 segment register __set_tls(tib); diff --git a/libc/thread/mktls.c b/libc/thread/mktls.c index 93919f051..d871603e5 100644 --- a/libc/thread/mktls.c +++ b/libc/thread/mktls.c @@ -25,6 +25,7 @@ #include "libc/mem/mem.h" #include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" +#include "libc/stdio/sysparam.h" #include "libc/str/locale.h" #include "libc/str/str.h" #include "libc/thread/tls.h" @@ -54,6 +55,19 @@ static char *_mktls_below(struct CosmoTib **out_tib) { char *mem, *tls; struct CosmoTib *tib; + // Here's the TLS memory layout on x86_64 + // + // __get_tls() + // │ + // %fs OpenBSD/NetBSD + // _Thread_local │ + // ┌───┬──────────┬──────────┼───┐ + // │pad│ .tdata │ .tbss │tib│ + // └───┴──────────┴──────────┼───┘ + // │ + // Linux/FreeBSD/Windows/Mac %gs + // + siz = ROUNDUP(I(_tls_size) + sizeof(*tib), _Alignof(struct CosmoTib)); siz = ROUNDUP(siz, _Alignof(struct CosmoTib)); mem = memalign(_Alignof(struct CosmoTib), siz); @@ -77,53 +91,58 @@ static char *_mktls_below(struct CosmoTib **out_tib) { } // clear .tbss - bzero(tls + I(_tbss_offset), I(_tbss_size)); + if (I(_tbss_size)) + bzero(tls + I(_tbss_offset), I(_tbss_size)); // set up thread information block return _mktls_finish(out_tib, mem, tib); } static char *_mktls_above(struct CosmoTib **out_tib) { - size_t hiz, siz; - struct CosmoTib *tib; - char *mem, *dtv, *tls; - // allocate memory for tdata, tbss, and tib - hiz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *), I(_tls_align)); - siz = hiz + I(_tls_size); - mem = memalign(TLS_ALIGNMENT, siz); + // Here's the TLS memory layout on aarch64 + // + // x28 + // %tpidr_el0 + // │ + // │ _Thread_local + // ┌───┼───┬──────────┬──────────┐ + // │tib│dtv│ .tdata │ .tbss │ + // ├───┴───┴──────────┴──────────┘ + // │ + // __get_tls() + // + + size_t size = ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) + // + ROUNDUP(sizeof(uintptr_t) * 2, I(_tdata_align)) + // + ROUNDUP(I(_tdata_size), I(_tbss_align)) + // + I(_tbss_size); + + char *mem = memalign(I(_tls_align), size); if (!mem) return 0; - // poison memory between tdata and tbss - if (IsAsan()) { - __asan_poison(mem + hiz + I(_tdata_size), I(_tbss_offset) - I(_tdata_size), - kAsanProtected); - } + struct CosmoTib *tib = + (struct CosmoTib *)(mem + + ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) - + sizeof(struct CosmoTib)); - tib = (struct CosmoTib *)mem; - dtv = mem + sizeof(*tib); - tls = mem + hiz; + uintptr_t *dtv = (uintptr_t *)(tib + 1); + size_t dtv_size = sizeof(uintptr_t) * 2; - // set dtv - ((uintptr_t *)dtv)[0] = 1; - ((void **)dtv)[1] = tls; - - // initialize .tdata + char *tdata = (char *)dtv + ROUNDUP(dtv_size, I(_tdata_align)); if (I(_tdata_size)) { - if (IsAsan()) { - __asan_memcpy(tls, _tdata_start, I(_tdata_size)); - } else { - memmove(tls, _tdata_start, I(_tdata_size)); - } + memmove(tdata, _tdata_start, I(_tdata_size)); } - // clear .tbss + char *tbss = tdata + ROUNDUP(I(_tdata_size), I(_tbss_align)); if (I(_tbss_size)) { - bzero(tls + I(_tbss_offset), I(_tbss_size)); + bzero(tbss, I(_tbss_size)); } - // set up thread information block + dtv[0] = 1; + dtv[1] = (uintptr_t)tdata; + return _mktls_finish(out_tib, mem, tib); }