/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "ape/sections.internal.h" #include "libc/dce.h" #include "libc/intrin/bits.h" #include "libc/intrin/kprintf.h" #include "libc/runtime/internal.h" #include "libc/runtime/morph.h" #include "libc/thread/tls.h" typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1))); privileged void __morph_tls(void) { #ifdef __x86_64__ // We need to rewrite SysV _Thread_local code. You MUST use the // -mno-tls-direct-seg-refs flag which generates code like this // // 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R // 64 48 03 0R4 25 00 00 00 00 add %fs:0,%R // // Which on Mac we can replace with this: // // 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R // // Since we have no idea where the TLS instructions exist in the // binary, we need to disassemble the whole program image. This'll // potentially take a few milliseconds for some larger programs. // // We check `_tls_content` which is generated by the linker script // since it lets us determine ahead of time if _Thread_local vars // have actually been linked into this program. if (IsWindows() || IsXnu()) { int n; uint64_t w; sigset_t mask; unsigned m, dis; unsigned char *p; __morph_begin(&mask); if (IsXnu()) { // Apple is quite straightforward to patch. We basically // just change the segment register, and the linear slot // address 0x30 was promised to us, according to Go team // https://github.com/golang/go/issues/23617 dis = 0x30; } else { // MSVC __declspec(thread) generates binary code for this // %gs:0x1480 abi. So long as TlsAlloc() isn't called >64 // times we should be good. dis = 0x1480 + __tls_index * 8; } // iterate over modifiable code looking for 9 byte instruction // this would take 30 ms using xed to enable tls on python.com for (p = _ereal; p + 9 <= __privileged_start; p += n) { // use sse to zoom zoom to fs register prefixes // that way it'll take 1 ms to morph python.com while (p + 9 + 16 <= __privileged_start) { if ((m = __builtin_ia32_pmovmskb128( *(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144, 0144}))) { m = __builtin_ctzll(m); p += m; break; } else { p += 16; } } // we're checking for the following expression: // 0144 == p[0] && // %fs // 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r) // (0213 == p[2] || // mov reg/mem → reg (word-sized) // 0003 == p[2]) && // add reg/mem → reg (word-sized) // 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg // 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32 // 0000 == p[5] && // displacement (von Neumann endian) // 0000 == p[6] && // displacement // 0000 == p[7] && // displacement // 0000 == p[8] // displacement w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377"); if ((w == READ64LE("\144\110\213\004\045\000\000\000") || w == READ64LE("\144\110\003\004\045\000\000\000")) && !p[8]) { // now change the code p[0] = 0145; // change %fs to %gs p[5] = (dis & 0x000000ff) >> 000; // displacement p[6] = (dis & 0x0000ff00) >> 010; // displacement p[7] = (dis & 0x00ff0000) >> 020; // displacement p[8] = (dis & 0xff000000) >> 030; // displacement // advance to the next instruction n = 9; } else { n = 1; } } __morph_end(&mask); } #endif }