mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
e0c2b91b3e
It never did anything and isn't worthwhile as documentation.
116 lines
5.3 KiB
C
116 lines
5.3 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "ape/sections.internal.h"
|
|
#include "libc/intrin/bits.h"
|
|
#include "libc/runtime/internal.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/thread/tls.h"
|
|
|
|
typedef char xmm_t __attribute__((__vector_size__(16), __aligned__(1)));
|
|
|
|
privileged void __morph_tls(void) {
|
|
#ifdef __x86_64__
|
|
// We need to rewrite SysV _Thread_local code. You MUST use the
|
|
// -mno-tls-direct-seg-refs flag which generates code like this
|
|
//
|
|
// 64 48 8b 0R4 25 00 00 00 00 mov %fs:0,%R
|
|
// 64 48 03 0R4 25 00 00 00 00 add %fs:0,%R
|
|
//
|
|
// Which on Mac we can replace with this:
|
|
//
|
|
// 65 48 8b 0R4 25 30 00 00 00 mov %gs:0x30,%R
|
|
//
|
|
// Since we have no idea where the TLS instructions exist in the
|
|
// binary, we need to disassemble the whole program image. This'll
|
|
// potentially take a few milliseconds for some larger programs.
|
|
//
|
|
// We check `_tls_content` which is generated by the linker script
|
|
// since it lets us determine ahead of time if _Thread_local vars
|
|
// have actually been linked into this program.
|
|
int n;
|
|
uint64_t w;
|
|
unsigned m, dis;
|
|
unsigned char *p;
|
|
__morph_begin();
|
|
|
|
if (IsXnu()) {
|
|
// Apple is quite straightforward to patch. We basically
|
|
// just change the segment register, and the linear slot
|
|
// address 0x30 was promised to us, according to Go team
|
|
// https://github.com/golang/go/issues/23617
|
|
dis = 0x30;
|
|
} else {
|
|
// MSVC __declspec(thread) generates binary code for this
|
|
// %gs:0x1480 abi. So long as TlsAlloc() isn't called >64
|
|
// times we should be good.
|
|
dis = 0x1480 + __tls_index * 8;
|
|
}
|
|
|
|
// iterate over modifiable code looking for 9 byte instruction
|
|
// this would take 30 ms using xed to enable tls on python.com
|
|
for (p = _ereal; p + 9 <= __privileged_start; p += n) {
|
|
|
|
// use sse to zoom zoom to fs register prefixes
|
|
// that way it'll take 1 ms to morph python.com
|
|
while (p + 9 + 16 <= __privileged_start) {
|
|
if ((m = __builtin_ia32_pmovmskb128(
|
|
*(xmm_t *)p == (xmm_t){0144, 0144, 0144, 0144, 0144, 0144, 0144,
|
|
0144, 0144, 0144, 0144, 0144, 0144, 0144,
|
|
0144, 0144}))) {
|
|
m = __builtin_ctzll(m);
|
|
p += m;
|
|
break;
|
|
} else {
|
|
p += 16;
|
|
}
|
|
}
|
|
|
|
// we're checking for the following expression:
|
|
// 0144 == p[0] && // %fs
|
|
// 0110 == (p[1] & 0373) && // rex.w (and ignore rex.r)
|
|
// (0213 == p[2] || // mov reg/mem → reg (word-sized)
|
|
// 0003 == p[2]) && // add reg/mem → reg (word-sized)
|
|
// 0004 == (p[3] & 0307) && // mod/rm (4,reg,0) means sib → reg
|
|
// 0045 == p[4] && // sib (5,4,0) → (rbp,rsp,0) → disp32
|
|
// 0000 == p[5] && // displacement (von Neumann endian)
|
|
// 0000 == p[6] && // displacement
|
|
// 0000 == p[7] && // displacement
|
|
// 0000 == p[8] // displacement
|
|
w = READ64LE(p) & READ64LE("\377\373\377\307\377\377\377\377");
|
|
if ((w == READ64LE("\144\110\213\004\045\000\000\000") ||
|
|
w == READ64LE("\144\110\003\004\045\000\000\000")) &&
|
|
!p[8]) {
|
|
|
|
// now change the code
|
|
p[0] = 0145; // change %fs to %gs
|
|
p[5] = (dis & 0x000000ff) >> 000; // displacement
|
|
p[6] = (dis & 0x0000ff00) >> 010; // displacement
|
|
p[7] = (dis & 0x00ff0000) >> 020; // displacement
|
|
p[8] = (dis & 0xff000000) >> 030; // displacement
|
|
|
|
// advance to the next instruction
|
|
n = 9;
|
|
} else {
|
|
n = 1;
|
|
}
|
|
}
|
|
|
|
__morph_end();
|
|
#endif
|
|
}
|