Fix thread-local storage bugs on aarch64

This change fixes an issue where .tbss memory might not be initialized.
This commit is contained in:
Justine Tunney 2024-05-08 04:03:51 -07:00
parent 793393a341
commit ae2a7ac844
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
6 changed files with 93 additions and 65 deletions

View file

@ -293,7 +293,6 @@ _tdata_size = _tdata_end - _tdata_start;
_tbss_size = _tbss_end - _tbss_start; _tbss_size = _tbss_end - _tbss_start;
_tbss_offset = _tbss_start - _tdata_start; _tbss_offset = _tbss_start - _tdata_start;
_tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start); _tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start);
_tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)); _tdata_align = ALIGNOF(.tdata);
_tbss_align = ALIGNOF(.tbss);
ASSERT(ALIGNOF(.tdata) <= TLS_ALIGNMENT && ALIGNOF(.tbss) <= TLS_ALIGNMENT, _tls_align = MAX(TLS_ALIGNMENT, MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)));
"_Thread_local _Alignof can't exceed TLS_ALIGNMENT");

View file

@ -553,7 +553,9 @@ _tdata_size = _tdata_end - _tdata_start;
_tbss_size = _tbss_end - _tbss_start; _tbss_size = _tbss_end - _tbss_start;
_tbss_offset = _tbss_start - _tdata_start; _tbss_offset = _tbss_start - _tdata_start;
_tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start); _tls_content = (_tdata_end - _tdata_start) + (_tbss_end - _tbss_start);
_tls_align = 1; _tdata_align = ALIGNOF(.tdata);
_tbss_align = ALIGNOF(.tbss);
_tls_align = MAX(TLS_ALIGNMENT, MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)));
ape_cod_offset = 0; ape_cod_offset = 0;
ape_cod_vaddr = ADDR(.head); ape_cod_vaddr = ADDR(.head);

View file

@ -16,6 +16,8 @@ extern unsigned char _tdata_end[] __attribute__((__weak__));
extern unsigned char _tbss_start[] __attribute__((__weak__)); extern unsigned char _tbss_start[] __attribute__((__weak__));
extern unsigned char _tbss_end[] __attribute__((__weak__)); extern unsigned char _tbss_end[] __attribute__((__weak__));
extern unsigned char _tls_align[] __attribute__((__weak__)); extern unsigned char _tls_align[] __attribute__((__weak__));
extern unsigned char _tdata_align[] __attribute__((__weak__));
extern unsigned char _tbss_align[] __attribute__((__weak__));
extern unsigned char __test_start[] __attribute__((__weak__)); extern unsigned char __test_start[] __attribute__((__weak__));
extern unsigned char __ro[] __attribute__((__weak__)); extern unsigned char __ro[] __attribute__((__weak__));
extern unsigned char __data_start[] __attribute__((__weak__)); extern unsigned char __data_start[] __attribute__((__weak__));

View file

@ -255,7 +255,7 @@ static int __sigaction(int sig, const struct sigaction *act,
// xnu silicon claims to support sa_resethand but it does nothing // xnu silicon claims to support sa_resethand but it does nothing
// this can be tested, since it clears the bit from flags as well // this can be tested, since it clears the bit from flags as well
if (!rc && oldact && if (!rc && oldact &&
(((struct sigaction_silicon *)ap)->sa_flags & SA_RESETHAND)) { (((struct sigaction_silicon *)oldact)->sa_flags & SA_RESETHAND)) {
((struct sigaction_silicon *)oldact)->sa_flags |= SA_RESETHAND; ((struct sigaction_silicon *)oldact)->sa_flags |= SA_RESETHAND;
} }
} }

View file

@ -112,10 +112,6 @@ static unsigned long ParseMask(const char *str) {
* and your `errno` variable also won't be thread safe anymore. * and your `errno` variable also won't be thread safe anymore.
*/ */
textstartup void __enable_tls(void) { textstartup void __enable_tls(void) {
int tid;
size_t siz;
char *mem, *tls;
struct CosmoTib *tib;
// Here's the layout we're currently using: // Here's the layout we're currently using:
// //
@ -138,7 +134,8 @@ textstartup void __enable_tls(void) {
#ifdef __x86_64__ #ifdef __x86_64__
siz = ROUNDUP(I(_tls_size) + sizeof(*tib), TLS_ALIGNMENT); char *mem;
size_t siz = ROUNDUP(I(_tls_size) + sizeof(struct CosmoTib), TLS_ALIGNMENT);
if (siz <= sizeof(__static_tls)) { if (siz <= sizeof(__static_tls)) {
// if tls requirement is small then use the static tls block // if tls requirement is small then use the static tls block
// which helps avoid a system call for appes with little tls // which helps avoid a system call for appes with little tls
@ -158,28 +155,44 @@ textstartup void __enable_tls(void) {
kAsanProtected); kAsanProtected);
} }
tib = (struct CosmoTib *)(mem + siz - sizeof(*tib)); struct CosmoTib *tib = (struct CosmoTib *)(mem + siz - sizeof(*tib));
tls = mem + siz - sizeof(*tib) - I(_tls_size); char *tls = mem + siz - sizeof(*tib) - I(_tls_size);
// copy in initialized data section
if (I(_tdata_size)) {
if (IsAsan()) {
__asan_memcpy(tls, _tdata_start, I(_tdata_size));
} else {
memcpy(tls, _tdata_start, I(_tdata_size));
}
}
#elif defined(__aarch64__) #elif defined(__aarch64__)
size_t hiz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *), I(_tls_align)); uintptr_t size = ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) + //
siz = hiz + I(_tls_size); ROUNDUP(sizeof(uintptr_t) * 2, I(_tdata_align)) + //
if (siz <= sizeof(__static_tls)) { ROUNDUP(I(_tdata_size), I(_tbss_align)) + //
I(_tbss_size);
char *mem;
if (I(_tls_align) <= TLS_ALIGNMENT && size <= sizeof(__static_tls)) {
mem = __static_tls; mem = __static_tls;
} else { } else {
mem = _weaken(_mapanon)(siz); mem = _weaken(_mapanon)(size);
} }
if (IsAsan()) { struct CosmoTib *tib =
// there's a roundup(pagesize) gap between .tdata and .tbss (struct CosmoTib *)(mem +
// poison that empty space ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) -
__asan_poison(mem + hiz + I(_tdata_size), I(_tbss_offset) - I(_tdata_size), sizeof(struct CosmoTib));
kAsanProtected);
}
tib = (struct CosmoTib *)mem; uintptr_t *dtv = (uintptr_t *)(tib + 1);
tls = mem + hiz; size_t dtv_size = sizeof(uintptr_t) * 2;
char *tdata = (char *)dtv + ROUNDUP(dtv_size, I(_tdata_align));
if (I(_tdata_size)) {
memmove(tdata, _tdata_start, I(_tdata_size));
}
// Set the DTV. // Set the DTV.
// //
@ -189,8 +202,8 @@ textstartup void __enable_tls(void) {
// //
// @see musl/src/env/__init_tls.c // @see musl/src/env/__init_tls.c
// @see https://chao-tic.github.io/blog/2018/12/25/tls // @see https://chao-tic.github.io/blog/2018/12/25/tls
((uintptr_t *)tls)[-2] = 1; dtv[0] = 1;
((void **)tls)[-1] = tls; dtv[1] = (uintptr_t)tdata;
#else #else
#error "unsupported architecture" #error "unsupported architecture"
@ -213,6 +226,8 @@ textstartup void __enable_tls(void) {
} else if (IsXnuSilicon()) { } else if (IsXnuSilicon()) {
tib->tib_syshand = __syslib->__pthread_self(); tib->tib_syshand = __syslib->__pthread_self();
} }
int tid;
if (IsLinux() || IsXnuSilicon()) { if (IsLinux() || IsXnuSilicon()) {
// gnu/systemd guarantees pid==tid for the main thread so we can // gnu/systemd guarantees pid==tid for the main thread so we can
// avoid issuing a superfluous system call at startup in program // avoid issuing a superfluous system call at startup in program
@ -237,15 +252,6 @@ textstartup void __enable_tls(void) {
_pthread_list = &_pthread_static.list; _pthread_list = &_pthread_static.list;
atomic_store_explicit(&_pthread_static.ptid, tid, memory_order_relaxed); atomic_store_explicit(&_pthread_static.ptid, tid, memory_order_relaxed);
// copy in initialized data section
if (I(_tdata_size)) {
if (IsAsan()) {
__asan_memcpy(tls, _tdata_start, I(_tdata_size));
} else {
memcpy(tls, _tdata_start, I(_tdata_size));
}
}
// ask the operating system to change the x86 segment register // ask the operating system to change the x86 segment register
__set_tls(tib); __set_tls(tib);

View file

@ -25,6 +25,7 @@
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#include "libc/runtime/internal.h" #include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/stdio/sysparam.h"
#include "libc/str/locale.h" #include "libc/str/locale.h"
#include "libc/str/str.h" #include "libc/str/str.h"
#include "libc/thread/tls.h" #include "libc/thread/tls.h"
@ -54,6 +55,19 @@ static char *_mktls_below(struct CosmoTib **out_tib) {
char *mem, *tls; char *mem, *tls;
struct CosmoTib *tib; struct CosmoTib *tib;
// Here's the TLS memory layout on x86_64
//
// __get_tls()
// │
// %fs OpenBSD/NetBSD
// _Thread_local │
// ┌───┬──────────┬──────────┼───┐
// │pad│ .tdata │ .tbss │tib│
// └───┴──────────┴──────────┼───┘
// │
// Linux/FreeBSD/Windows/Mac %gs
//
siz = ROUNDUP(I(_tls_size) + sizeof(*tib), _Alignof(struct CosmoTib)); siz = ROUNDUP(I(_tls_size) + sizeof(*tib), _Alignof(struct CosmoTib));
siz = ROUNDUP(siz, _Alignof(struct CosmoTib)); siz = ROUNDUP(siz, _Alignof(struct CosmoTib));
mem = memalign(_Alignof(struct CosmoTib), siz); mem = memalign(_Alignof(struct CosmoTib), siz);
@ -77,53 +91,58 @@ static char *_mktls_below(struct CosmoTib **out_tib) {
} }
// clear .tbss // clear .tbss
bzero(tls + I(_tbss_offset), I(_tbss_size)); if (I(_tbss_size))
bzero(tls + I(_tbss_offset), I(_tbss_size));
// set up thread information block // set up thread information block
return _mktls_finish(out_tib, mem, tib); return _mktls_finish(out_tib, mem, tib);
} }
static char *_mktls_above(struct CosmoTib **out_tib) { static char *_mktls_above(struct CosmoTib **out_tib) {
size_t hiz, siz;
struct CosmoTib *tib;
char *mem, *dtv, *tls;
// allocate memory for tdata, tbss, and tib // Here's the TLS memory layout on aarch64
hiz = ROUNDUP(sizeof(*tib) + 2 * sizeof(void *), I(_tls_align)); //
siz = hiz + I(_tls_size); // x28
mem = memalign(TLS_ALIGNMENT, siz); // %tpidr_el0
// │
// │ _Thread_local
// ┌───┼───┬──────────┬──────────┐
// │tib│dtv│ .tdata │ .tbss │
// ├───┴───┴──────────┴──────────┘
// │
// __get_tls()
//
size_t size = ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) + //
ROUNDUP(sizeof(uintptr_t) * 2, I(_tdata_align)) + //
ROUNDUP(I(_tdata_size), I(_tbss_align)) + //
I(_tbss_size);
char *mem = memalign(I(_tls_align), size);
if (!mem) if (!mem)
return 0; return 0;
// poison memory between tdata and tbss struct CosmoTib *tib =
if (IsAsan()) { (struct CosmoTib *)(mem +
__asan_poison(mem + hiz + I(_tdata_size), I(_tbss_offset) - I(_tdata_size), ROUNDUP(sizeof(struct CosmoTib), I(_tls_align)) -
kAsanProtected); sizeof(struct CosmoTib));
}
tib = (struct CosmoTib *)mem; uintptr_t *dtv = (uintptr_t *)(tib + 1);
dtv = mem + sizeof(*tib); size_t dtv_size = sizeof(uintptr_t) * 2;
tls = mem + hiz;
// set dtv char *tdata = (char *)dtv + ROUNDUP(dtv_size, I(_tdata_align));
((uintptr_t *)dtv)[0] = 1;
((void **)dtv)[1] = tls;
// initialize .tdata
if (I(_tdata_size)) { if (I(_tdata_size)) {
if (IsAsan()) { memmove(tdata, _tdata_start, I(_tdata_size));
__asan_memcpy(tls, _tdata_start, I(_tdata_size));
} else {
memmove(tls, _tdata_start, I(_tdata_size));
}
} }
// clear .tbss char *tbss = tdata + ROUNDUP(I(_tdata_size), I(_tbss_align));
if (I(_tbss_size)) { if (I(_tbss_size)) {
bzero(tls + I(_tbss_offset), I(_tbss_size)); bzero(tbss, I(_tbss_size));
} }
// set up thread information block dtv[0] = 1;
dtv[1] = (uintptr_t)tdata;
return _mktls_finish(out_tib, mem, tib); return _mktls_finish(out_tib, mem, tib);
} }