Remove some legacy cruft

Function trace logs will report stack usage accurately. It won't include
the argv/environ block. Our clone() polyfill is now simpler and does not
use as much stack memory. Function call tracing on x86 is now faster too
This commit is contained in:
Justine Tunney 2025-01-02 18:44:07 -08:00
parent 8db646f6b2
commit a15958edc6
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
21 changed files with 291 additions and 467 deletions

View file

@ -254,7 +254,7 @@ static bool elf_slurp(struct Loaded *l, int fd, const char *file) {
return true; return true;
} }
static dontinline bool elf_load(struct Loaded *l, const char *file, long pagesz, dontinline static bool elf_load(struct Loaded *l, const char *file, long pagesz,
char *interp_path, size_t interp_size) { char *interp_path, size_t interp_size) {
int fd; int fd;
if ((fd = open(file, O_RDONLY | O_CLOEXEC)) == -1) if ((fd = open(file, O_RDONLY | O_CLOEXEC)) == -1)
@ -280,7 +280,7 @@ static long *push_strs(long *sp, char **list, int count) {
return sp; return sp;
} }
static wontreturn dontinstrument void foreign_helper(void **p) { wontreturn dontinstrument static void foreign_helper(void **p) {
__foreign.dlopen = p[0]; __foreign.dlopen = p[0];
__foreign.dlsym = p[1]; __foreign.dlsym = p[1];
__foreign.dlclose = p[2]; __foreign.dlclose = p[2];
@ -288,7 +288,7 @@ static wontreturn dontinstrument void foreign_helper(void **p) {
_longjmp(__foreign.jb, 1); _longjmp(__foreign.jb, 1);
} }
static dontinline void elf_exec(const char *file, char **envp) { dontinline static void elf_exec(const char *file, char **envp) {
// get microprocessor page size // get microprocessor page size
long pagesz = __pagesize; long pagesz = __pagesize;
@ -412,7 +412,7 @@ static char *dlerror_set(const char *str) {
return dlerror_buf; return dlerror_buf;
} }
static dontinline char *foreign_alloc_block(void) { dontinline static char *foreign_alloc_block(void) {
char *p = 0; char *p = 0;
size_t sz = 65536; size_t sz = 65536;
if (!IsWindows()) { if (!IsWindows()) {
@ -435,7 +435,7 @@ static dontinline char *foreign_alloc_block(void) {
return p; return p;
} }
static dontinline void *foreign_alloc(size_t n) { dontinline static void *foreign_alloc(size_t n) {
void *res; void *res;
static char *block; static char *block;
__dlopen_lock(); __dlopen_lock();
@ -548,7 +548,7 @@ static void *foreign_thunk_nt(void *func) {
return code; return code;
} }
static dontinline bool foreign_compile(char exe[hasatleast PATH_MAX]) { dontinline static bool foreign_compile(char exe[hasatleast PATH_MAX]) {
// construct path // construct path
strlcpy(exe, get_tmp_dir(), PATH_MAX); strlcpy(exe, get_tmp_dir(), PATH_MAX);

View file

@ -49,6 +49,6 @@
int __vcscanf(int (*)(void *), int (*)(int, void *), void *, const char *, int __vcscanf(int (*)(void *), int (*)(int, void *), void *, const char *,
va_list); va_list);
int __fmt(void *, void *, const char *, va_list, int *); int __fmt(void *, void *, const char *, va_list, int *);
__msabi char16_t *__itoa16(char16_t[21], uint64_t); char16_t *__itoa16(char16_t[21], uint64_t) __msabi;
#endif /* COSMOPOLITAN_LIBC_FMT_STRTOL_H_ */ #endif /* COSMOPOLITAN_LIBC_FMT_STRTOL_H_ */

View file

@ -89,14 +89,14 @@ __msabi extern typeof(WriteFile) *const __imp_WriteFile;
extern pthread_mutex_t __sig_worker_lock; extern pthread_mutex_t __sig_worker_lock;
HAIRY static bool __sig_ignored_by_default(int sig) { textwindows static bool __sig_ignored_by_default(int sig) {
return sig == SIGURG || // return sig == SIGURG || //
sig == SIGCONT || // sig == SIGCONT || //
sig == SIGCHLD || // sig == SIGCHLD || //
sig == SIGWINCH; sig == SIGWINCH;
} }
HAIRY bool __sig_ignored(int sig) { textwindows bool __sig_ignored(int sig) {
return __sighandrvas[sig] == (intptr_t)SIG_IGN || return __sighandrvas[sig] == (intptr_t)SIG_IGN ||
(__sighandrvas[sig] == (intptr_t)SIG_DFL && (__sighandrvas[sig] == (intptr_t)SIG_DFL &&
__sig_ignored_by_default(sig)); __sig_ignored_by_default(sig));
@ -532,14 +532,14 @@ textwindows void __sig_generate(int sig, int sic) {
} }
} }
HAIRY static char *__sig_stpcpy(char *d, const char *s) { textwindows static char *__sig_stpcpy(char *d, const char *s) {
size_t i; size_t i;
for (i = 0;; ++i) for (i = 0;; ++i)
if (!(d[i] = s[i])) if (!(d[i] = s[i]))
return d + i; return d + i;
} }
HAIRY wontreturn static void __sig_death(int sig, const char *thing) { textwindows wontreturn static void __sig_death(int sig, const char *thing) {
#ifndef TINY #ifndef TINY
intptr_t hStderr; intptr_t hStderr;
char sigbuf[21], s[128], *p; char sigbuf[21], s[128], *p;
@ -810,7 +810,7 @@ HAIRY static uint32_t __sig_worker(void *arg) {
_pthread_mutex_unlock(&__sig_worker_lock); _pthread_mutex_unlock(&__sig_worker_lock);
Sleep(POLL_INTERVAL_MS); Sleep(POLL_INTERVAL_MS);
} }
return 0; __builtin_unreachable();
} }
__attribute__((__constructor__(10))) textstartup void __sig_init(void) { __attribute__((__constructor__(10))) textstartup void __sig_init(void) {

View file

@ -34,6 +34,8 @@
#include "libc/nt/thunk/msabi.h" #include "libc/nt/thunk/msabi.h"
#ifdef __x86_64__ #ifdef __x86_64__
#define ABI __msabi textwindows dontinstrument
// cut back on code size and avoid setting errno // cut back on code size and avoid setting errno
// this code is a mandatory dependency of winmain // this code is a mandatory dependency of winmain
__msabi extern typeof(CloseHandle) *const __imp_CloseHandle; __msabi extern typeof(CloseHandle) *const __imp_CloseHandle;
@ -47,8 +49,8 @@ __msabi extern typeof(GetEnvironmentVariable)
*const __imp_GetEnvironmentVariableW; *const __imp_GetEnvironmentVariableW;
// Generates C:\ProgramData\cosmo\sig\x\y.pid like path // Generates C:\ProgramData\cosmo\sig\x\y.pid like path
__msabi textwindows dontinstrument char16_t *__sig_process_path( ABI char16_t *__sig_process_path(char16_t *path, uint32_t pid,
char16_t *path, uint32_t pid, int create_directories) { int create_directories) {
char16_t buf[3]; char16_t buf[3];
char16_t *p = path; char16_t *p = path;
uint32_t vlen = __imp_GetEnvironmentVariableW(u"SYSTEMDRIVE", buf, 3); uint32_t vlen = __imp_GetEnvironmentVariableW(u"SYSTEMDRIVE", buf, 3);
@ -100,7 +102,7 @@ __msabi textwindows dontinstrument char16_t *__sig_process_path(
return path; return path;
} }
__msabi textwindows atomic_ulong *__sig_map_process(int pid, int disposition) { ABI atomic_ulong *__sig_map_process(int pid, int disposition) {
char16_t path[128]; char16_t path[128];
__sig_process_path(path, pid, disposition == kNtOpenAlways); __sig_process_path(path, pid, disposition == kNtOpenAlways);
intptr_t hand = __imp_CreateFileW(path, kNtGenericRead | kNtGenericWrite, intptr_t hand = __imp_CreateFileW(path, kNtGenericRead | kNtGenericWrite,

View file

@ -32,8 +32,7 @@ int sys_sigprocmask(int how, const sigset_t *opt_set,
how, opt_set ? (sigset_t *)(intptr_t)(uint32_t)*opt_set : 0, 0, 0); how, opt_set ? (sigset_t *)(intptr_t)(uint32_t)*opt_set : 0, 0, 0);
rc = 0; rc = 0;
} }
if (rc != -1 && opt_out_oldset) { if (rc != -1 && opt_out_oldset)
*opt_out_oldset = old[0]; *opt_out_oldset = old[0];
}
return rc; return rc;
} }

View file

@ -36,12 +36,10 @@
privileged const char *strsignal_r(int sig, char buf[21]) { privileged const char *strsignal_r(int sig, char buf[21]) {
char *p; char *p;
const char *s; const char *s;
if (!sig) { if (!sig)
return "0"; return "0";
} if ((s = GetMagnumStr(kSignalNames, sig)))
if ((s = GetMagnumStr(kSignalNames, sig))) {
return s; return s;
}
if (SIGRTMIN <= sig && sig <= SIGRTMAX) { if (SIGRTMIN <= sig && sig <= SIGRTMAX) {
sig -= SIGRTMIN; sig -= SIGRTMIN;
buf[0] = 'S'; buf[0] = 'S';

View file

@ -79,7 +79,11 @@ int ulock_wait(uint32_t operation, void *addr, uint64_t value,
// it could also mean another thread calling ulock on this address was // it could also mean another thread calling ulock on this address was
// configured (via operation) in an inconsistent way. // configured (via operation) in an inconsistent way.
// //
int ulock_wake(uint32_t operation, void *addr, uint64_t wake_value) { // should be dontinstrument because SiliconThreadMain() calls this from
// a stack managed by apple libc.
//
dontinstrument int ulock_wake(uint32_t operation, void *addr,
uint64_t wake_value) {
int rc; int rc;
rc = __syscall3i(operation, (long)addr, wake_value, 0x2000000 | 516); rc = __syscall3i(operation, (long)addr, wake_value, 0x2000000 | 516);
LOCKTRACE("ulock_wake(%#x, %p, %lx) → %s", operation, addr, wake_value, LOCKTRACE("ulock_wake(%#x, %p, %lx) → %s", operation, addr, wake_value,

View file

@ -48,9 +48,8 @@
* @param st is open symbol table for current executable * @param st is open symbol table for current executable
* @return -1 w/ errno if error happened * @return -1 w/ errno if error happened
*/ */
dontinstrument int PrintBacktraceUsingSymbols(int fd, int PrintBacktraceUsingSymbols(int fd, const struct StackFrame *bp,
const struct StackFrame *bp, struct SymbolTable *st) {
struct SymbolTable *st) {
size_t gi; size_t gi;
char *cxxbuf; char *cxxbuf;
intptr_t addr; intptr_t addr;

View file

@ -33,14 +33,14 @@ static char __watch_last[4096];
void __watch_hook(void); void __watch_hook(void);
static dontinstrument inline void Copy(char *p, char *q, size_t n) { dontinstrument static inline void Copy(char *p, char *q, size_t n) {
size_t i; size_t i;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
p[i] = q[i]; p[i] = q[i];
} }
} }
static dontinstrument inline int Cmp(char *p, char *q, size_t n) { dontinstrument static inline int Cmp(char *p, char *q, size_t n) {
if (n == 8) if (n == 8)
return READ64LE(p) != READ64LE(q); return READ64LE(p) != READ64LE(q);
if (n == 4) if (n == 4)

View file

@ -291,21 +291,6 @@
.balign 4 .balign 4
.endm .endm
// Loads address of errno into %rcx
.macro .errno
call __errno_location
.endm
// Post-Initialization Read-Only (PIRO) BSS section.
// @param ss is an optional string, for control image locality
.macro .piro ss
.ifnb \ss
.section .piro.sort.bss.\ss,"aw",@nobits
.else
.section .piro.bss,"aw",@nobits
.endif
.endm
// Helpers for Cosmopolitan _init() amalgamation magic. // Helpers for Cosmopolitan _init() amalgamation magic.
// @param name should be consistent across macros for a module // @param name should be consistent across macros for a module
// @see libc/runtime/_init.S // @see libc/runtime/_init.S

View file

@ -71,7 +71,7 @@ struct Procs __proc = {
.lock = PTHREAD_MUTEX_INITIALIZER, .lock = PTHREAD_MUTEX_INITIALIZER,
}; };
static textwindows void __proc_stats(int64_t h, struct rusage *ru) { textwindows static void __proc_stats(int64_t h, struct rusage *ru) {
bzero(ru, sizeof(*ru)); bzero(ru, sizeof(*ru));
struct NtProcessMemoryCountersEx memcount = {sizeof(memcount)}; struct NtProcessMemoryCountersEx memcount = {sizeof(memcount)};
GetProcessMemoryInfo(h, &memcount, sizeof(memcount)); GetProcessMemoryInfo(h, &memcount, sizeof(memcount));
@ -137,7 +137,7 @@ textwindows int __proc_harvest(struct Proc *pr, bool iswait4) {
return sic; return sic;
} }
static textwindows dontinstrument uint32_t __proc_worker(void *arg) { textwindows dontinstrument static uint32_t __proc_worker(void *arg) {
struct CosmoTib tls; struct CosmoTib tls;
char *sp = __builtin_frame_address(0); char *sp = __builtin_frame_address(0);
__bootstrap_tls(&tls, __builtin_frame_address(0)); __bootstrap_tls(&tls, __builtin_frame_address(0));
@ -246,7 +246,7 @@ static textwindows dontinstrument uint32_t __proc_worker(void *arg) {
/** /**
* Lazy initializes process tracker data structures and worker. * Lazy initializes process tracker data structures and worker.
*/ */
static textwindows void __proc_setup(void) { textwindows static void __proc_setup(void) {
__proc.onbirth = CreateEvent(0, 0, 0, 0); // auto reset __proc.onbirth = CreateEvent(0, 0, 0, 0); // auto reset
__proc.haszombies = CreateEvent(0, 1, 0, 0); // manual reset __proc.haszombies = CreateEvent(0, 1, 0, 0); // manual reset
__proc.thread = CreateThread(0, STACK_SIZE, __proc_worker, 0, __proc.thread = CreateThread(0, STACK_SIZE, __proc_worker, 0,

View file

@ -26,7 +26,7 @@
// @param rdx x2 is ptid // @param rdx x2 is ptid
// @param rcx x3 is ctid // @param rcx x3 is ctid
// @param r8 x4 is tls // @param r8 x4 is tls
// @param r9 x5 is func(void*,int)→int // @param r9 x5 is func(void*)→int
// @param 8(rsp) x6 is arg // @param 8(rsp) x6 is arg
// @return tid of child on success, or -errno on error // @return tid of child on success, or -errno on error
sys_clone_linux: sys_clone_linux:
@ -45,16 +45,10 @@ sys_clone_linux:
ret ret
2: xor %ebp,%ebp // child thread 2: xor %ebp,%ebp // child thread
mov %rbx,%rdi // arg mov %rbx,%rdi // arg
mov %r10,%r15 // experiment
mov (%r10),%esi // tid
call *%r9 // func(arg,tid) call *%r9 // func(arg,tid)
xchg %eax,%edi // func(arg,tid) exitcode xchg %eax,%edi // func(arg,tid) exitcode
mov (%r15),%eax // experiment
test %eax,%eax // experiment
jz 1f // experiment
mov $60,%eax // __NR_exit(exitcode) mov $60,%eax // __NR_exit(exitcode)
syscall syscall
1: hlt // ctid was corrupted by program!
#elif defined(__aarch64__) #elif defined(__aarch64__)
stp x29,x30,[sp,#-16]! stp x29,x30,[sp,#-16]!
mov x29,sp mov x29,sp
@ -69,7 +63,6 @@ sys_clone_linux:
2: mov x29,#0 // wipe backtrace 2: mov x29,#0 // wipe backtrace
mov x28,x3 // set cosmo tls mov x28,x3 // set cosmo tls
mov x0,x6 // child thread mov x0,x6 // child thread
ldr w1,[x4] // arg2 = *ctid
blr x5 blr x5
mov x8,#93 // __NR_exit mov x8,#93 // __NR_exit
svc #0 svc #0

View file

@ -16,50 +16,27 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE. PERFORMANCE OF THIS SOFTWARE.
*/ */
#include "libc/sysv/consts/clone.h"
#include "libc/assert.h"
#include "libc/atomic.h" #include "libc/atomic.h"
#include "libc/calls/calls.h"
#include "libc/calls/state.internal.h" #include "libc/calls/state.internal.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/ucontext-netbsd.internal.h" #include "libc/calls/struct/ucontext-netbsd.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/wincrash.internal.h"
#include "libc/dce.h" #include "libc/dce.h"
#include "libc/errno.h" #include "libc/intrin/asmflag.h"
#include "libc/intrin/atomic.h" #include "libc/intrin/atomic.h"
#include "libc/intrin/describeflags.h"
#include "libc/intrin/strace.h"
#include "libc/intrin/ulock.h" #include "libc/intrin/ulock.h"
#include "libc/intrin/weaken.h"
#include "libc/limits.h" #include "libc/limits.h"
#include "libc/macros.h"
#include "libc/mem/alloca.h" #include "libc/mem/alloca.h"
#include "libc/nt/enum/processcreationflags.h" #include "libc/nt/enum/processcreationflags.h"
#include "libc/nt/runtime.h" #include "libc/nt/runtime.h"
#include "libc/nt/signals.h"
#include "libc/nt/synchronization.h" #include "libc/nt/synchronization.h"
#include "libc/nt/thread.h" #include "libc/nt/thread.h"
#include "libc/nt/thunk/msabi.h" #include "libc/nt/thunk/msabi.h"
#include "libc/runtime/internal.h"
#include "libc/runtime/runtime.h" #include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/runtime/syslib.internal.h" #include "libc/runtime/syslib.internal.h"
#include "libc/sock/internal.h" #include "libc/sock/internal.h"
#include "libc/stdalign.h"
#include "libc/stdio/sysparam.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/arch.h" #include "libc/sysv/consts/arch.h"
#include "libc/sysv/consts/clone.h"
#include "libc/sysv/consts/futex.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/nrlinux.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/freebsd.internal.h" #include "libc/thread/freebsd.internal.h"
#include "libc/thread/openbsd.internal.h" #include "libc/thread/openbsd.internal.h"
#include "libc/thread/posixthread.internal.h" #include "libc/thread/posixthread.internal.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
#include "libc/thread/xnu.internal.h" #include "libc/thread/xnu.internal.h"
#define kMaxThreadIds 32768 #define kMaxThreadIds 32768
@ -79,28 +56,19 @@
#define LWP_SUSPENDED 0x00000080 #define LWP_SUSPENDED 0x00000080
struct CloneArgs { struct CloneArgs {
alignas(16) union { union {
struct { long sp;
atomic_int tid;
int this;
};
int64_t tid64; int64_t tid64;
}; };
atomic_int *ptid; atomic_int *ptid;
atomic_int *ctid; atomic_int *ctid;
atomic_int *ztid;
char *tls; char *tls;
int (*func)(void *, int); int (*func)(void *);
void *arg; void *arg;
long sp;
}; };
int sys_set_tls(uintptr_t, void *); int sys_set_tls(uintptr_t, void *);
int __stack_call(void *, int, long, long, int (*)(void *, int), long); int __stack_call(void *, int, long, long, int (*)(void *), long);
static long AlignStack(long sp, char *stk, long stksz, int mal) {
return sp & -mal;
}
#ifdef __x86_64__ #ifdef __x86_64__
@ -109,7 +77,6 @@ static long AlignStack(long sp, char *stk, long stksz, int mal) {
__msabi extern typeof(ExitThread) *const __imp_ExitThread; __msabi extern typeof(ExitThread) *const __imp_ExitThread;
__msabi extern typeof(GetCurrentThreadId) *const __imp_GetCurrentThreadId; __msabi extern typeof(GetCurrentThreadId) *const __imp_GetCurrentThreadId;
__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
__msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll; __msabi extern typeof(WakeByAddressAll) *const __imp_WakeByAddressAll;
textwindows dontinstrument wontreturn static void // textwindows dontinstrument wontreturn static void //
@ -117,51 +84,45 @@ WinThreadEntry(int rdi, // rcx
int rsi, // rdx int rsi, // rdx
int rdx, // r8 int rdx, // r8
struct CloneArgs *wt) { // r9 struct CloneArgs *wt) { // r9
int rc; __set_tls_win32(wt->tls);
if (wt->tls)
__set_tls_win32(wt->tls);
int tid = __imp_GetCurrentThreadId(); int tid = __imp_GetCurrentThreadId();
atomic_int *ctid = wt->ctid;
atomic_init(ctid, tid);
atomic_init(wt->ptid, tid); atomic_init(wt->ptid, tid);
atomic_init(wt->ctid, tid); int rc = __stack_call(wt->arg, tid, 0, 0, wt->func, wt->sp);
rc = __stack_call(wt->arg, wt->tid, 0, 0, wt->func, wt->sp);
// we can now clear ctid directly since we're no longer using our own // we can now clear ctid directly since we're no longer using our own
// stack memory, which can now be safely free'd by the parent thread. // stack memory, which can now be safely free'd by the parent thread.
atomic_store_explicit(wt->ztid, 0, memory_order_release); atomic_store_explicit(ctid, 0, memory_order_release);
__imp_WakeByAddressAll(wt->ztid); __imp_WakeByAddressAll(ctid);
// since we didn't indirect this function through NT2SYSV() it's not // since we didn't indirect this function through NT2SYSV() it's not
// safe to simply return, and as such, we need ExitThread(). // safe to simply return, and as such, we need ExitThread().
__imp_ExitThread(rc); __imp_ExitThread(rc);
__builtin_unreachable(); __builtin_unreachable();
} }
static textwindows errno_t CloneWindows(int (*func)(void *, int), char *stk, textwindows static errno_t CloneWindows(int (*func)(void *), char *stk,
size_t stksz, int flags, void *arg, size_t stksz, void *arg, void *tls,
void *tls, atomic_int *ptid, atomic_int *ptid, atomic_int *ctid) {
atomic_int *ctid) {
long sp; long sp;
int64_t h; int64_t h;
intptr_t tip;
uint32_t utid; uint32_t utid;
struct CloneArgs *wt; struct CloneArgs *wt;
sp = (intptr_t)stk + stksz; sp = tip = (intptr_t)stk + stksz;
sp = AlignStack(sp, stk, stksz, 16);
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -alignof(struct CloneArgs); sp &= -alignof(struct CloneArgs);
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = ctid;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ptid = ptid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->func = func; wt->func = func;
wt->arg = arg; wt->arg = arg;
wt->tls = flags & CLONE_SETTLS ? tls : 0; wt->tls = tls;
wt->sp = sp; wt->sp = tip & -16;
if ((h = CreateThread(&kNtIsInheritable, 65536, (void *)WinThreadEntry, wt, if ((h = CreateThread(&kNtIsInheritable, 65536, (void *)WinThreadEntry, wt,
kNtStackSizeParamIsAReservation, &utid))) { kNtStackSizeParamIsAReservation, &utid))) {
if (flags & CLONE_PARENT_SETTID) atomic_init(ptid, utid);
atomic_init(ptid, utid); struct CosmoTib *tib = tls;
if (flags & CLONE_SETTLS) { atomic_store_explicit(&tib->tib_syshand, h, memory_order_release);
struct CosmoTib *tib = tls;
atomic_store_explicit(&tib->tib_syshand, h, memory_order_release);
}
return 0; return 0;
} else { } else {
return __dos2errno(GetLastError()); return __dos2errno(GetLastError());
@ -185,37 +146,33 @@ asm("XnuThreadThunk:\n\t"
".size\tXnuThreadThunk,.-XnuThreadThunk"); ".size\tXnuThreadThunk,.-XnuThreadThunk");
__attribute__((__used__)) __attribute__((__used__))
static dontinstrument wontreturn void dontinstrument wontreturn static void
XnuThreadMain(void *pthread, // rdi XnuThreadMain(void *pthread, // rdi
int tid, // rsi int tid, // rsi
int (*func)(void *arg, int tid), // rdx int (*func)(void *arg), // rdx
void *arg, // rcx void *arg, // rcx
struct CloneArgs *wt, // r8 struct CloneArgs *wt, // r8
unsigned xnuflags) { // r9 unsigned xnuflags) { // r9
int ax;
wt->tid = tid;
atomic_init(wt->ctid, tid); atomic_init(wt->ctid, tid);
atomic_init(wt->ptid, tid); atomic_init(wt->ptid, tid);
if (wt->tls) { // XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the // Go team at Google that they Apply stands by our ability to use it
// Go team at Google that they Apply stands by our ability to use it // https://github.com/golang/go/issues/23617#issuecomment-376662373
// https://github.com/golang/go/issues/23617#issuecomment-376662373 int ax;
asm volatile("syscall" asm volatile("syscall"
: "=a"(ax) : "=a"(ax)
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30) : "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc"); : "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
}
func(arg, tid); func(arg);
// we no longer use the stack after this point // we no longer use the stack after this point
// %rax = int bsdthread_terminate(%rdi = void *stackaddr, // %rax = int bsdthread_terminate(%rdi = void *stackaddr,
// %rsi = size_t freesize, // %rsi = size_t freesize,
// %rdx = uint32_t port, // %rdx = uint32_t port,
// %r10 = uint32_t sem); // %r10 = uint32_t sem);
asm volatile("movl\t$0,(%%rsi)\n\t" // *wt->ztid = 0 asm volatile("movl\t$0,(%%rsi)\n\t" // *wt->ctid = 0
"mov\t$0x101,%%edi\n\t" // wake all "mov\t$0x101,%%edi\n\t" // wake all
"xor\t%%edx,%%edx\n\t" // wake_value "xor\t%%edx,%%edx\n\t" // wake_value
"mov\t$0x02000204,%%eax\n\t" // ulock_wake() "mov\t$0x02000204,%%eax\n\t" // ulock_wake()
@ -227,19 +184,18 @@ XnuThreadMain(void *pthread, // rdi
"mov\t$0x02000169,%%eax\n\t" // bsdthread_terminate() "mov\t$0x02000169,%%eax\n\t" // bsdthread_terminate()
"syscall" "syscall"
: /* no outputs */ : /* no outputs */
: "S"(wt->ztid) : "S"(wt->ctid)
: "rax", "rcx", "r10", "r11", "memory"); : "rax", "rcx", "r10", "r11", "memory");
__builtin_unreachable(); __builtin_unreachable();
} }
static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags, static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, void *arg,
void *arg, void *tls, atomic_int *ptid, void *tls, atomic_int *ptid, atomic_int *ctid) {
atomic_int *ctid) {
// perform this weird mandatory system call once // perform this weird mandatory system call once
static bool once; static bool once;
if (!once) { if (!once) {
npassert(sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0) != -1); sys_bsdthread_register(XnuThreadThunk, 0, 0, 0, 0, 0, 0);
once = true; once = true;
} }
@ -247,16 +203,15 @@ static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
long sp; long sp;
struct CloneArgs *wt; struct CloneArgs *wt;
sp = (intptr_t)stk + stksz; sp = (intptr_t)stk + stksz;
sp = AlignStack(sp, stk, stksz, 16);
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -alignof(struct CloneArgs); sp &= -alignof(struct CloneArgs);
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
sp &= -16;
// pass parameters to new thread via xnu // pass parameters to new thread via xnu
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = ctid;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ptid = ptid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->tls = tls;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
return sys_clone_xnu(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU); return sys_clone_xnu(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU);
} }
@ -267,25 +222,25 @@ static errno_t CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
// 1. __asan_handle_no_return wipes stack [todo?] // 1. __asan_handle_no_return wipes stack [todo?]
relegated dontinstrument wontreturn static void OpenbsdThreadMain(void *p) { relegated dontinstrument wontreturn static void OpenbsdThreadMain(void *p) {
struct CloneArgs *wt = p; struct CloneArgs *wt = p;
atomic_init(wt->ptid, wt->tid); int tid = atomic_load_explicit(wt->ctid, memory_order_relaxed);
atomic_init(wt->ctid, wt->tid); atomic_init(wt->ptid, tid);
wt->func(wt->arg, wt->tid); wt->func(wt->arg);
asm volatile("mov\t%2,%%rsp\n\t" // so syscall can validate stack exists asm volatile("mov\t%1,%%rsp\n\t" // so syscall can validate stack exists
"movl\t$0,(%%rdi)\n\t" // *wt->ztid = 0 (old stack now free'd) "movl\t$0,(%2)\n\t" // *wt->ctid = 0 (old stack now free'd)
"syscall\n\t" // futex(int*, op, val) will wake wait0 "syscall\n\t" // futex(int*, op, val) will wake wait0
"xor\t%%edi,%%edi\n\t" // so kernel doesn't write to old stack "xor\t%%edi,%%edi\n\t" // so kernel doesn't write to old stack
"mov\t$302,%%eax\n\t" // __threxit(int *notdead) doesn't wake "mov\t$302,%%eax\n\t" // __threxit(int *notdead) doesn't wake
"syscall" "syscall"
: "=m"(*wt->ztid) : /* no outputs */
: "a"(83), "m"(__oldstack), "D"(wt->ztid), : "a"(83), "m"(__oldstack), "D"(wt->ctid),
"S"(2 /* FUTEX_WAKE */), "d"(INT_MAX) "S"(2 /* FUTEX_WAKE */), "d"(INT_MAX)
: "rcx", "r11", "memory"); : "rcx", "r11", "memory");
__builtin_unreachable(); __builtin_unreachable();
} }
relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk, relegated static errno_t CloneOpenbsd(int (*func)(void *), char *stk,
size_t stksz, int flags, void *arg, void *tls, size_t stksz, void *arg, void *tls,
atomic_int *ptid, atomic_int *ctid) { atomic_int *ptid, atomic_int *ctid) {
int rc; int rc;
intptr_t sp; intptr_t sp;
struct __tfork *tf; struct __tfork *tf;
@ -297,18 +252,18 @@ relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk,
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -alignof(struct CloneArgs); sp &= -alignof(struct CloneArgs);
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16); sp &= -16;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; sp -= 8;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; *(intptr_t *)sp = (intptr_t)CloneOpenbsd + 1;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid; wt->ctid = ctid;
wt->ptid = ptid;
wt->arg = arg; wt->arg = arg;
wt->func = func; wt->func = func;
tf->tf_stack = (char *)sp - 8; tf->tf_stack = (char *)sp;
tf->tf_tcb = flags & CLONE_SETTLS ? tls : 0; tf->tf_tcb = tls;
tf->tf_tid = &wt->tid; tf->tf_tid = ctid;
if ((rc = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) >= 0) { if ((rc = __tfork_thread(tf, sizeof(*tf), OpenbsdThreadMain, wt)) >= 0) {
if (flags & CLONE_PARENT_SETTID) atomic_init(ptid, rc);
atomic_init(ptid, rc);
return 0; return 0;
} else { } else {
return -rc; return -rc;
@ -319,35 +274,30 @@ relegated errno_t CloneOpenbsd(int (*func)(void *, int), char *stk,
// NET BESIYATA DISHMAYA // NET BESIYATA DISHMAYA
wontreturn dontinstrument static void NetbsdThreadMain( wontreturn dontinstrument static void NetbsdThreadMain(
void *arg, // rdi void *arg, // rdi
int (*func)(void *, int), // rsi int (*func)(void *), // rsi
int flags, // rdx atomic_int *ctid, // rdx
atomic_int *ctid, // rcx atomic_int *ptid) { // rcx
atomic_int *ptid) { // r8 int ax;
int ax, dx; asm("syscall"
static atomic_int clobber; : "=a"(ax) // man says always succeeds
atomic_int *ztid = &clobber; : "0"(311) // _lwp_self()
ax = sys_gettid(); : "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
if (flags & CLONE_CHILD_SETTID) atomic_init(ctid, ax);
atomic_init(ctid, ax); atomic_init(ptid, ax);
if (flags & CLONE_PARENT_SETTID) func(arg);
atomic_init(ptid, ax);
if (flags & CLONE_CHILD_CLEARTID)
ztid = ctid;
func(arg, ax);
// we no longer use the stack after this point // we no longer use the stack after this point
// %eax = int __lwp_exit(void); // %eax = int __lwp_exit(void);
asm volatile("movl\t$0,%2\n\t" // *ztid = 0 asm volatile("movl\t$0,(%2)\n\t" // *ztid = 0
"syscall" // __lwp_exit() "syscall" // __lwp_exit()
: "=a"(ax), "=d"(dx), "=m"(*ztid) : "=a"(ax)
: "0"(310) : "0"(310), "r"(ctid)
: "rcx", "r11", "memory"); : "rcx", "r11", "memory");
__builtin_unreachable(); __builtin_unreachable();
} }
static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz, static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, void *arg,
int flags, void *arg, void *tls, atomic_int *ptid, void *tls, atomic_int *ptid, atomic_int *ctid) {
atomic_int *ctid) {
// NetBSD has its own clone() and it works, but it's technically a // NetBSD has its own clone() and it works, but it's technically a
// second-class API, intended to help Linux folks migrate to this. // second-class API, intended to help Linux folks migrate to this.
int ax; int ax;
@ -363,13 +313,12 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
: CFLAG_CONSTRAINT(failed), "=a"(ax) : CFLAG_CONSTRAINT(failed), "=a"(ax)
: "1"(__NR_getcontext_netbsd), "D"(&netbsd_clone_template) : "1"(__NR_getcontext_netbsd), "D"(&netbsd_clone_template)
: "rcx", "rdx", "r8", "r9", "r10", "r11", "memory"); : "rcx", "rdx", "r8", "r9", "r10", "r11", "memory");
npassert(!failed);
once = true; once = true;
} }
sp = (intptr_t)stk + stksz; sp = (intptr_t)stk + stksz;
// align the stack // align the stack
sp = AlignStack(sp, stk, stksz, 16); sp &= -16;
// simulate call to misalign stack and ensure backtrace looks good // simulate call to misalign stack and ensure backtrace looks good
sp -= 8; sp -= 8;
@ -377,8 +326,7 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
// place the giant 784 byte ucontext structure in the red zone! // place the giant 784 byte ucontext structure in the red zone!
// it only has to live long enough for the thread to come alive // it only has to live long enough for the thread to come alive
ctx = (struct ucontext_netbsd *)((sp - sizeof(struct ucontext_netbsd)) & ctx = (struct ucontext_netbsd *)((sp - sizeof(struct ucontext_netbsd)) & -64);
-alignof(struct ucontext_netbsd));
// pass parameters in process state // pass parameters in process state
memcpy(ctx, &netbsd_clone_template, sizeof(*ctx)); memcpy(ctx, &netbsd_clone_template, sizeof(*ctx));
@ -388,17 +336,14 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
ctx->uc_mcontext.rip = (intptr_t)NetbsdThreadMain; ctx->uc_mcontext.rip = (intptr_t)NetbsdThreadMain;
ctx->uc_mcontext.rdi = (intptr_t)arg; ctx->uc_mcontext.rdi = (intptr_t)arg;
ctx->uc_mcontext.rsi = (intptr_t)func; ctx->uc_mcontext.rsi = (intptr_t)func;
ctx->uc_mcontext.rdx = flags; ctx->uc_mcontext.rdx = (intptr_t)ctid;
ctx->uc_mcontext.rcx = (intptr_t)ctid; ctx->uc_mcontext.rcx = (intptr_t)ptid;
ctx->uc_mcontext.r8 = (intptr_t)ptid;
ctx->uc_flags |= _UC_STACK; ctx->uc_flags |= _UC_STACK;
ctx->uc_stack.ss_sp = stk; ctx->uc_stack.ss_sp = stk;
ctx->uc_stack.ss_size = stksz; ctx->uc_stack.ss_size = stksz;
ctx->uc_stack.ss_flags = 0; ctx->uc_stack.ss_flags = 0;
if (flags & CLONE_SETTLS) { ctx->uc_flags |= _UC_TLSBASE;
ctx->uc_flags |= _UC_TLSBASE; ctx->uc_mcontext._mc_tlsbase = (intptr_t)tls;
ctx->uc_mcontext._mc_tlsbase = (intptr_t)tls;
}
// perform the system call // perform the system call
int tid = 0; int tid = 0;
@ -407,9 +352,7 @@ static int CloneNetbsd(int (*func)(void *, int), char *stk, size_t stksz,
: "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(&tid) : "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(&tid)
: "rcx", "r8", "r9", "r10", "r11", "memory"); : "rcx", "r8", "r9", "r10", "r11", "memory");
if (!failed) { if (!failed) {
unassert(tid); atomic_init(ptid, tid);
if (flags & CLONE_PARENT_SETTID)
atomic_init(ptid, tid);
return 0; return 0;
} else { } else {
return ax; return ax;
@ -428,35 +371,35 @@ wontreturn dontinstrument static void FreebsdThreadMain(void *p) {
#elif defined(__x86_64__) #elif defined(__x86_64__)
sys_set_tls(AMD64_SET_GSBASE, wt->tls); sys_set_tls(AMD64_SET_GSBASE, wt->tls);
#endif #endif
atomic_init(wt->ctid, wt->tid); atomic_init(wt->ctid, wt->tid64);
atomic_init(wt->ptid, wt->tid); atomic_init(wt->ptid, wt->tid64);
wt->func(wt->arg, wt->tid); wt->func(wt->arg);
// we no longer use the stack after this point // we no longer use the stack after this point
// void thr_exit(%rdi = long *state); // void thr_exit(%rdi = long *state);
#ifdef __x86_64__ #ifdef __x86_64__
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0 asm volatile("movl\t$0,%0\n\t" // *wt->ctid = 0
"syscall\n\t" // _umtx_op(wt->ztid, WAKE, INT_MAX) "syscall\n\t" // _umtx_op(wt->ctid, WAKE, INT_MAX)
"movl\t$431,%%eax\n\t" // thr_exit(long *nonzeroes_and_wake) "movl\t$431,%%eax\n\t" // thr_exit(long *nonzeroes_and_wake)
"xor\t%%edi,%%edi\n\t" // sad we can't use this free futex op "xor\t%%edi,%%edi\n\t" // sad we can't use this free futex op
"syscall\n\t" // thr_exit() fails if thread is orphaned "syscall\n\t" // thr_exit() fails if thread is orphaned
"movl\t$1,%%eax\n\t" // _exit() "movl\t$1,%%eax\n\t" // _exit()
"syscall" // "syscall" //
: "=m"(*wt->ztid) : "=m"(*wt->ctid)
: "a"(454), "D"(wt->ztid), "S"(UMTX_OP_WAKE), "d"(INT_MAX) : "a"(454), "D"(wt->ctid), "S"(UMTX_OP_WAKE), "d"(INT_MAX)
: "rcx", "r8", "r9", "r10", "r11", "memory"); : "rcx", "r8", "r9", "r10", "r11", "memory");
#elif defined(__aarch64__) #elif defined(__aarch64__)
register long x0 asm("x0") = (long)wt->ztid; register long x0 asm("x0") = (long)wt->ctid;
register long x1 asm("x1") = UMTX_OP_WAKE; register long x1 asm("x1") = UMTX_OP_WAKE;
register long x2 asm("x2") = INT_MAX; register long x2 asm("x2") = INT_MAX;
register long x8 asm("x8") = 454; // _umtx_op register long x8 asm("x8") = 454; // _umtx_op
asm volatile("str\twzr,%0\n\t" // *wt->ztid = 0 asm volatile("str\twzr,%0\n\t" // *wt->ctid = 0
"svc\t0\n\t" // _umtx_op(wt->ztid, WAKE, INT_MAX) "svc\t0\n\t" // _umtx_op(wt->ctid, WAKE, INT_MAX)
"mov\tx0,#0\n\t" // arg0 = 0 "mov\tx0,#0\n\t" // arg0 = 0
"mov\tx8,#431\n\t" // thr_exit "mov\tx8,#431\n\t" // thr_exit
"svc\t0\n\t" // thr_exit(long *nonzeroes_and_wake = 0) "svc\t0\n\t" // thr_exit(long *nonzeroes_and_wake = 0)
"mov\tx8,#1\n\t" // _exit "mov\tx8,#1\n\t" // _exit
"svc\t0" // _exit(long *nonzeroes_and_wake = 0) "svc\t0" // _exit(long *nonzeroes_and_wake = 0)
: "=m"(*wt->ztid) : "=m"(*wt->ctid)
: "r"(x0), "r"(x1), "r"(x2), "r"(x8)); : "r"(x0), "r"(x1), "r"(x2), "r"(x8));
#else #else
#error "unsupported architecture" #error "unsupported architecture"
@ -464,20 +407,19 @@ wontreturn dontinstrument static void FreebsdThreadMain(void *p) {
__builtin_unreachable(); __builtin_unreachable();
} }
static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz, static errno_t CloneFreebsd(int (*func)(void *), char *stk, size_t stksz,
int flags, void *arg, void *tls, atomic_int *ptid, void *arg, void *tls, atomic_int *ptid,
atomic_int *ctid) { atomic_int *ctid) {
long sp; long sp;
int64_t tid; int64_t tid64;
struct CloneArgs *wt; struct CloneArgs *wt;
sp = (intptr_t)stk + stksz; sp = (intptr_t)stk + stksz;
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -alignof(struct CloneArgs); sp &= -alignof(struct CloneArgs);
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16); sp &= -16;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid; wt->ctid = ctid;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid; wt->ptid = ptid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = tls; wt->tls = tls;
wt->func = func; wt->func = func;
wt->arg = arg; wt->arg = arg;
@ -486,10 +428,10 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
.arg = wt, .arg = wt,
.stack_base = stk, .stack_base = stk,
.stack_size = sp - (long)stk, .stack_size = sp - (long)stk,
.tls_base = flags & CLONE_SETTLS ? tls : 0, .tls_base = tls,
.tls_size = 64, .tls_size = 64,
.child_tid = &wt->tid64, .child_tid = &wt->tid64,
.parent_tid = &tid, .parent_tid = &tid64,
}; };
#ifdef __x86_64__ #ifdef __x86_64__
int ax; int ax;
@ -510,8 +452,7 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
#else #else
#error "unsupported architecture" #error "unsupported architecture"
#endif #endif
if (flags & CLONE_PARENT_SETTID) atomic_init(ptid, tid64);
atomic_init(ptid, tid);
return 0; return 0;
} }
@ -522,57 +463,57 @@ static errno_t CloneFreebsd(int (*func)(void *, int), char *stk, size_t stksz,
dontinstrument static void *SiliconThreadMain(void *arg) { dontinstrument static void *SiliconThreadMain(void *arg) {
struct CloneArgs *wt = arg; struct CloneArgs *wt = arg;
atomic_int *ctid = wt->ctid;
int tid = atomic_load_explicit(ctid, memory_order_relaxed);
asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls)); asm volatile("mov\tx28,%0" : /* no outputs */ : "r"(wt->tls));
atomic_init(wt->ctid, wt->this); __stack_call(wt->arg, tid, 0, 0, wt->func, wt->sp);
atomic_init(wt->ptid, wt->this); atomic_store_explicit(ctid, 0, memory_order_release);
__stack_call(wt->arg, wt->this, 0, 0, wt->func, wt->sp); ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, ctid, 0);
atomic_store_explicit(wt->ztid, 0, memory_order_release);
ulock_wake(UL_COMPARE_AND_WAIT | ULF_WAKE_ALL, wt->ztid, 0);
return 0; return 0;
} }
static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz, static errno_t CloneSilicon(int (*fn)(void *), char *stk, size_t stksz,
int flags, void *arg, void *tls, atomic_int *ptid, void *arg, void *tls, atomic_int *ptid,
atomic_int *ctid) { atomic_int *ctid) {
long sp;
void *attr; // assign tid to new thread
errno_t res;
unsigned tid;
pthread_t th;
size_t babystack;
struct CloneArgs *wt;
static atomic_uint tids; static atomic_uint tids;
sp = (intptr_t)stk + stksz; unsigned tid = atomic_fetch_add_explicit(&tids, 1, memory_order_relaxed);
tid %= kMaxThreadIds;
tid += kMinThreadId;
atomic_init(ctid, tid);
atomic_init(ptid, tid);
// pass temp data on stack
intptr_t sp, tip;
struct CloneArgs *wt;
sp = tip = (intptr_t)stk + stksz;
sp -= sizeof(struct CloneArgs); sp -= sizeof(struct CloneArgs);
sp &= -alignof(struct CloneArgs); sp &= -alignof(struct CloneArgs);
wt = (struct CloneArgs *)sp; wt = (struct CloneArgs *)sp;
sp = AlignStack(sp, stk, stksz, 16);
tid = atomic_fetch_add_explicit(&tids, 1, memory_order_acq_rel);
wt->this = tid = (tid % kMaxThreadIds) + kMinThreadId;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ptid = flags & CLONE_PARENT_SETTID ? ptid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
wt->func = fn; wt->func = fn;
wt->arg = arg; wt->arg = arg;
wt->sp = sp; wt->tls = tls;
babystack = __syslib->__pthread_stack_min; wt->ctid = ctid;
wt->sp = tip & -16;
// ask apple libc to spawn thread
errno_t res;
pthread_t th;
size_t babystack = __syslib->__pthread_stack_min;
#pragma GCC push_options #pragma GCC push_options
#pragma GCC diagnostic ignored "-Walloca-larger-than=" #pragma GCC diagnostic ignored "-Walloca-larger-than="
attr = alloca(__syslib->__sizeof_pthread_attr_t); void *attr = alloca(__syslib->__sizeof_pthread_attr_t);
#pragma GCC pop_options #pragma GCC pop_options
unassert(!__syslib->__pthread_attr_init(attr)); __syslib->__pthread_attr_init(attr);
unassert(!__syslib->__pthread_attr_setguardsize(attr, 0)); __syslib->__pthread_attr_setguardsize(attr, 0);
unassert(!__syslib->__pthread_attr_setstacksize(attr, babystack)); __syslib->__pthread_attr_setstacksize(attr, babystack);
if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) { if (!(res = __syslib->__pthread_create(&th, attr, SiliconThreadMain, wt))) {
if (flags & CLONE_PARENT_SETTID) atomic_init(ptid, tid);
atomic_init(ptid, tid); struct CosmoTib *tib = tls;
if (flags & CLONE_SETTLS) { atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release);
struct CosmoTib *tib = tls;
atomic_store_explicit(&tib[-1].tib_syshand, th, memory_order_release);
}
} }
unassert(!__syslib->__pthread_attr_destroy(attr)); __syslib->__pthread_attr_destroy(attr);
return res; return res;
} }
@ -582,10 +523,9 @@ static errno_t CloneSilicon(int (*fn)(void *, int), char *stk, size_t stksz,
// GNU/SYSTEMD // GNU/SYSTEMD
struct LinuxCloneArgs { struct LinuxCloneArgs {
int (*func)(void *, int); int (*func)(void *);
void *arg; void *arg;
char *tls; char *tls;
atomic_int ctid;
}; };
int sys_clone_linux(int flags, // rdi int sys_clone_linux(int flags, // rdi
@ -596,44 +536,32 @@ int sys_clone_linux(int flags, // rdi
void *func, // r9 void *func, // r9
void *arg); // 8(rsp) void *arg); // 8(rsp)
dontinstrument static int LinuxThreadEntry(void *arg, int tid) { dontinstrument static int AmdLinuxThreadEntry(void *arg) {
struct LinuxCloneArgs *wt = arg; struct LinuxCloneArgs *wt = arg;
#if defined(__x86_64__)
sys_set_tls(ARCH_SET_GS, wt->tls); sys_set_tls(ARCH_SET_GS, wt->tls);
#endif return wt->func(wt->arg);
return wt->func(wt->arg, tid);
} }
static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz, static int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
int flags, void *arg, void *tls, atomic_int *ptid, void *arg, void *tls, atomic_int *ptid,
atomic_int *ctid) { atomic_int *ctid) {
int rc; long sp = (intptr_t)stk + stksz;
long sp;
struct LinuxCloneArgs *wt; #if defined(__x86_64__)
sp = (intptr_t)stk + stksz;
sp -= sizeof(struct LinuxCloneArgs); sp -= sizeof(struct LinuxCloneArgs);
sp &= -alignof(struct LinuxCloneArgs); sp &= -alignof(struct LinuxCloneArgs);
wt = (struct LinuxCloneArgs *)sp; struct LinuxCloneArgs *wt = (struct LinuxCloneArgs *)sp;
// align the stack sp &= -16; // align the stack
#ifdef __aarch64__ wt->arg = arg;
sp = AlignStack(sp, stk, stksz, 128); // for kernel <=4.6 wt->tls = tls;
#else wt->func = func;
sp = AlignStack(sp, stk, stksz, 16); func = AmdLinuxThreadEntry;
arg = wt;
#elif defined(__aarch64__)
sp &= -128; // for kernels <=4.6
#endif #endif
#ifdef __x86_64__
if (flags & CLONE_SETTLS) { int rc;
flags &= ~CLONE_SETTLS;
wt->arg = arg;
wt->tls = tls;
wt->func = func;
func = LinuxThreadEntry;
arg = wt;
}
#endif
if (~flags & CLONE_CHILD_SETTID) {
flags |= CLONE_CHILD_SETTID;
ctid = &wt->ctid;
}
if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) { if ((rc = sys_clone_linux(flags, sp, ptid, ctid, tls, func, arg)) >= 0) {
// clone() is documented as setting ptid before return // clone() is documented as setting ptid before return
return 0; return 0;
@ -646,110 +574,9 @@ static int CloneLinux(int (*func)(void *arg, int rc), char *stk, size_t stksz,
// COSMOPOLITAN // COSMOPOLITAN
/** /**
* Creates thread without malloc being linked. * Creates thread without malloc() being linked.
* *
* If you use clone() you're on your own. Example: * If you use clone() you're on your own.
*
* int worker(void *arg) { return 0; }
* struct CosmoTib tib = {.tib_self = &tib, .tib_ctid = -1};
* atomic_int tid;
* char *stk = NewCosmoStack();
* clone(worker, stk, GetStackSize() - 16,
* CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES |
* CLONE_SYSVSEM | CLONE_SIGHAND | CLONE_PARENT_SETTID |
* CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS,
* arg, &tid, &tib, &tib.tib_tid);
* while (atomic_load(&tid) == 0) sched_yield();
* // thread is known
* while (atomic_load(&tib.tib_ctid) < 0) sched_yield();
* // thread is running
* while (atomic_load(&tib.tib_ctid) > 0) sched_yield();
* // thread has terminated
* FreeCosmoStack(stk);
*
* Threads are created in a detached manner. They currently can't be
* synchronized using wait() or posix signals. Threads created by this
* function should be synchronized using shared memory operations.
*
* Any memory that's required by this system call wrapper is allocated
* to the top of your stack. This shouldn't be more than 128 bytes.
*
* Your function is called from within the stack you specify. A return
* address is pushed onto your stack, that causes returning to jump to
* _Exit1() which terminates the thread. Even though the callback says
* it supports a return code, that'll only work on Linux and Windows.
*
* This function follows the same ABI convention as the Linux userspace
* libraries, with a few small changes. The varargs has been removed to
* help prevent broken code, and the stack size and tls size parameters
* are introduced for compatibility with FreeBSD.
*
* To keep this system call lightweight, only the thread creation use
* case is polyfilled across platforms. For example, if you want fork
* that works on OpenBSD for example, don't do it with clone(SIGCHLD)
* and please just call fork(). Even if you do that on Linux, it will
* effectively work around libc features like atfork(), so that means
* other calls like getpid() may return incorrect values.
*
* @param func is your callback function, which this wrapper requires
* not be null, otherwise EINVAL is raised. It is passed two args
* within the child thread: (1) the caller-supplied `arg` and (2)
* the new tid is always passed in the second arg for convenience
*
* @param stk points to the bottom of a caller allocated stack, which
* must be allocated via mmap() using the MAP_STACK flag, or else
* you won't get optimal performance and it won't work on OpenBSD
*
* @param stksz is the size of that stack in bytes, we recommend that
* that this be set to GetStackSize() or else memory safety tools
* like kprintf() can't do as good and quick of a job; this value
* must be 16-aligned plus it must be at least 4192 bytes in size
* and it's advised to have the bottom-most page, be a guard page
*
* @param flags which SHOULD always have all of these flags:
*
* - `CLONE_THREAD`
* - `CLONE_VM`
* - `CLONE_FS`
* - `CLONE_FILES`
* - `CLONE_SIGHAND`
* - `CLONE_SYSVSEM`
*
* This system call wrapper is intended for threads, and as such, we
* won't polyfill Linux's ability to simulate unrelated calls (e.g.
* fork, vfork) via clone() on other platforms. Please just call
* fork() and vfork() when that's what you want.
*
* Your `flags` may also optionally also additionally bitwise-OR any
* combination of the following additional flags:
*
* - `CLONE_CHILD_SETTID` must be specified if you intend to set the
* `ctid` argument, which will updated with the child tid once the
* child has started.
*
* - `CLONE_PARENT_SETTID` must be specified if you intend to set
* the `ptid` argument, and it is updated at the most opportune
* moment. On all platforms except XNU x86, this happens before
* clone() returns. But since it might not be available yet you
* need to use pthread_getunique_np() to obtain it.
*
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon child thread
* termination. This is used to implement join so that the parent
* may know when it's safe to free the child's stack memory, and
* as such, is guaranteed to happen AFTER the child thread has
* either terminated or has finished using its stack memory
*
* - `CLONE_SETTLS` is needed if you intend to specify the `tls`
* argument, which after thread creation may be accessed using
* __get_tls(). Doing this means that `errno`, gettid(), etc.
* correctly work. Caveat emptor if you choose not to do this.
*
* @param arg is passed as an argument to `func` in the child thread
* @param tls may be used to set the thread local storage segment;
* this parameter is ignored if `CLONE_SETTLS` is not set
* @param ctid lets the child receive its thread id without having to
* call gettid() and is ignored if `CLONE_CHILD_SETTID` isn't set
* @return 0 on success, or errno on errno
*/ */
errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg, errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
void *ptid, void *tls, void *ctid) { void *ptid, void *tls, void *ctid) {
@ -757,33 +584,25 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
atomic_fetch_add(&_pthread_count, 1); atomic_fetch_add(&_pthread_count, 1);
if (!func) { if (IsLinux()) {
err = EINVAL;
} else if (IsLinux()) {
err = CloneLinux(func, stk, stksz, flags, arg, tls, ptid, ctid); err = CloneLinux(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else if (!IsTiny() &&
(flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) !=
(CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND | CLONE_SYSVSEM)) {
err = EINVAL;
} else if (IsXnu()) { } else if (IsXnu()) {
#ifdef __x86_64__ #if defined(__x86_64__)
err = CloneXnu(func, stk, stksz, flags, arg, tls, ptid, ctid); err = CloneXnu(func, stk, stksz, arg, tls, ptid, ctid);
#elif defined(__aarch64__) #elif defined(__aarch64__)
err = CloneSilicon(func, stk, stksz, flags, arg, tls, ptid, ctid); err = CloneSilicon(func, stk, stksz, arg, tls, ptid, ctid);
#else #else
#error "unsupported architecture" #error "unsupported architecture"
#endif #endif
} else if (IsFreebsd()) { } else if (IsFreebsd()) {
err = CloneFreebsd(func, stk, stksz, flags, arg, tls, ptid, ctid); err = CloneFreebsd(func, stk, stksz, arg, tls, ptid, ctid);
#ifdef __x86_64__ #if defined(__x86_64__)
} else if (IsNetbsd()) {
err = CloneNetbsd(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else if (IsOpenbsd()) {
err = CloneOpenbsd(func, stk, stksz, flags, arg, tls, ptid, ctid);
} else if (IsWindows()) { } else if (IsWindows()) {
err = CloneWindows(func, stk, stksz, flags, arg, tls, ptid, ctid); err = CloneWindows(func, stk, stksz, arg, tls, ptid, ctid);
} else if (IsNetbsd()) {
err = CloneNetbsd(func, stk, stksz, arg, tls, ptid, ctid);
} else if (IsOpenbsd()) {
err = CloneOpenbsd(func, stk, stksz, arg, tls, ptid, ctid);
#endif /* __x86_64__ */ #endif /* __x86_64__ */
} else { } else {
err = ENOSYS; err = ENOSYS;
@ -793,7 +612,7 @@ errno_t clone(void *func, void *stk, size_t stksz, int flags, void *arg,
err = EAGAIN; err = EAGAIN;
if (err) if (err)
unassert(atomic_fetch_sub(&_pthread_count, 1) > 1); atomic_fetch_sub(&_pthread_count, 1);
return err; return err;
} }

View file

@ -22,18 +22,25 @@
ftrace_hook: ftrace_hook:
#ifdef __x86_64__ #ifdef __x86_64__
// We need to save saved registers because we have some functions // save argument registers
// like __errno_location which can be called from an inline asm() // we save %rax because __gc() takes it as an argument.
// statement. It's nice to have the flexibility anyway. // we save %r10 because it's used as a syscall argument.
cmpl $0,__ftrace(%rip) cmpl $0,__ftrace(%rip)
jle 1f jle 1f
push %rbp push %rbp
mov %rsp,%rbp mov %rsp,%rbp
and $-16,%rsp and $-16,%rsp
sub $256,%rsp sub $128,%rsp
movdqu %xmm0,-0x80(%rbp)
movdqu %xmm1,-0x70(%rbp)
movdqu %xmm2,-0x60(%rbp)
movdqu %xmm3,-0x50(%rbp)
movdqu %xmm4,-0x40(%rbp)
movdqu %xmm5,-0x30(%rbp)
movdqu %xmm6,-0x20(%rbp)
movdqu %xmm7,-0x10(%rbp)
push %rax push %rax
push %rbx
push %rcx push %rcx
push %rdx push %rdx
push %rdi push %rdi
@ -41,19 +48,15 @@ ftrace_hook:
push %r8 push %r8
push %r9 push %r9
push %r10 push %r10
push %r11
push %r12
push %r13
push %r14
push %r15
call __xmm_save
call ftracer call ftracer
call __xmm_load movdqu -0x80(%rbp),%xmm0
pop %r15 movdqu -0x70(%rbp),%xmm1
pop %r14 movdqu -0x60(%rbp),%xmm2
pop %r13 movdqu -0x50(%rbp),%xmm3
pop %r12 movdqu -0x40(%rbp),%xmm4
pop %r11 movdqu -0x30(%rbp),%xmm5
movdqu -0x20(%rbp),%xmm6
movdqu -0x10(%rbp),%xmm7
pop %r10 pop %r10
pop %r9 pop %r9
pop %r8 pop %r8
@ -61,7 +64,6 @@ ftrace_hook:
pop %rdi pop %rdi
pop %rdx pop %rdx
pop %rcx pop %rcx
pop %rbx
pop %rax pop %rax
leave leave
1: ret 1: ret

View file

@ -31,11 +31,7 @@
#include "libc/thread/tls.h" #include "libc/thread/tls.h"
/** /**
* @fileoverview Plain-text function call logging. * @fileoverview plain-text function call logging
*
* Able to log ~2 million function calls per second, which is mostly
* bottlenecked by system call overhead. Log size is reasonable if piped
* into gzip.
*/ */
#define MAX_NESTING 512 #define MAX_NESTING 512
@ -49,7 +45,7 @@
static struct CosmoFtrace g_ftrace; static struct CosmoFtrace g_ftrace;
__funline int GetNestingLevelImpl(struct StackFrame *frame) { __funline int GetNestingLevelImpl(struct StackFrame *frame) {
int nesting = -2; int nesting = -1;
while (frame && !kisdangerous(frame)) { while (frame && !kisdangerous(frame)) {
++nesting; ++nesting;
frame = frame->next; frame = frame->next;
@ -82,38 +78,63 @@ privileged void ftracer(void) {
struct StackFrame *sf; struct StackFrame *sf;
struct CosmoFtrace *ft; struct CosmoFtrace *ft;
struct PosixThread *pt; struct PosixThread *pt;
// get interesting values
sf = __builtin_frame_address(0); sf = __builtin_frame_address(0);
st = (uintptr_t)__argv - sizeof(uintptr_t); st = (uintptr_t)__argv - sizeof(uintptr_t);
if (__ftrace <= 0) if (__ftrace <= 0)
return; return;
// determine top of stack
// main thread won't consider kernel provided argblock
if (__tls_enabled) { if (__tls_enabled) {
tib = __get_tls_privileged(); tib = __get_tls_privileged();
if (tib->tib_ftrace <= 0) if (tib->tib_ftrace <= 0)
return; return;
ft = &tib->tib_ftracer; ft = &tib->tib_ftracer;
if ((char *)sf >= tib->tib_sigstack_addr && pt = (struct PosixThread *)tib->tib_pthread;
(char *)sf <= tib->tib_sigstack_addr + tib->tib_sigstack_size) { if (pt != &_pthread_static) {
st = (uintptr_t)tib->tib_sigstack_addr + tib->tib_sigstack_size; if ((char *)sf >= tib->tib_sigstack_addr &&
} else if ((pt = (struct PosixThread *)tib->tib_pthread) && (char *)sf <= tib->tib_sigstack_addr + tib->tib_sigstack_size) {
pt->pt_attr.__stacksize) { st = (uintptr_t)tib->tib_sigstack_addr + tib->tib_sigstack_size;
st = (uintptr_t)pt->pt_attr.__stackaddr + pt->pt_attr.__stacksize; } else if (pt && pt->pt_attr.__stacksize) {
st = (uintptr_t)pt->pt_attr.__stackaddr + pt->pt_attr.__stacksize;
}
} }
} else { } else {
ft = &g_ftrace; ft = &g_ftrace;
} }
stackuse = st - (intptr_t)sf;
if (_cmpxchg(&ft->ft_once, false, true)) { // estimate stack pointer of hooked function
uintptr_t usp = (uintptr_t)sf;
usp += sizeof(struct StackFrame); // overhead of this function
#if defined(__x86_64__)
usp += 8; // ftrace_hook() stack aligning
usp += 8 * 8; // ftrace_hook() pushed 8x regs
usp += 8 * 16; // ftrace_hook() pushed 8x xmms
#elif defined(__aarch64__)
usp += 384; // overhead of ftrace_hook()
#else
#error "unsupported architecture"
#endif
// determine how much stack hooked function is using
stackuse = st - usp;
// log function call
//
// FUN $PID $TID $STARTNANOS $STACKUSE $SYMBOL
//
if (!ft->ft_once) {
ft->ft_lastaddr = -1; ft->ft_lastaddr = -1;
ft->ft_skew = GetNestingLevelImpl(sf); ft->ft_skew = GetNestingLevelImpl(sf);
ft->ft_once = true;
} }
if (_cmpxchg(&ft->ft_noreentry, false, true)) { sf = sf->next;
sf = sf->next; fn = sf->addr + DETOUR_SKEW;
fn = sf->addr + DETOUR_SKEW; if (fn != ft->ft_lastaddr) {
if (fn != ft->ft_lastaddr) { kprintf("%rFUN %6P %6H %'18T %'*ld %*s%t\n", ftrace_stackdigs, stackuse,
kprintf("%rFUN %6P %6H %'18T %'*ld %*s%t\n", ftrace_stackdigs, stackuse, GetNestingLevel(ft, sf) * 2, "", fn);
GetNestingLevel(ft, sf) * 2, "", fn); ft->ft_lastaddr = fn;
ft->ft_lastaddr = fn;
}
ft->ft_noreentry = false;
} }
} }

View file

@ -84,7 +84,8 @@ o/$(MODE)/libc/sysv/sysret.o: private \
CFLAGS += \ CFLAGS += \
-ffreestanding \ -ffreestanding \
-fno-stack-protector \ -fno-stack-protector \
-fno-sanitize=all -fno-sanitize=all \
-mgeneral-regs-only
ifeq ($(ARCH),aarch64) ifeq ($(ARCH),aarch64)
o/$(MODE)/libc/sysv/sysv.o: private \ o/$(MODE)/libc/sysv/sysv.o: private \

View file

@ -35,8 +35,10 @@ errno_t __errno;
/** /**
* Returns address of `errno` variable. * Returns address of `errno` variable.
*
* This function promises to not clobber argument registers.
*/ */
errno_t *__errno_location(void) { nocallersavedregisters errno_t *__errno_location(void) {
if (__tls_enabled) { if (__tls_enabled) {
return &__get_tls()->tib_errno; return &__get_tls()->tib_errno;
} else { } else {

View file

@ -187,7 +187,7 @@ systemfive_error:
#endif #endif
systemfive_errno: systemfive_errno:
xchg %eax,%ecx xchg %eax,%ecx
.errno call __errno_location
mov %ecx,(%rax) // normalize to c library convention mov %ecx,(%rax) // normalize to c library convention
push $-1 // negative one is only error result push $-1 // negative one is only error result
pop %rax // the push pop is to save code size pop %rax // the push pop is to save code size

View file

@ -44,7 +44,7 @@
#define STACK_SIZE 65536 #define STACK_SIZE 65536
static textwindows dontinstrument uint32_t __itimer_worker(void *arg) { textwindows dontinstrument static uint32_t __itimer_worker(void *arg) {
struct CosmoTib tls; struct CosmoTib tls;
char *sp = __builtin_frame_address(0); char *sp = __builtin_frame_address(0);
__bootstrap_tls(&tls, sp); __bootstrap_tls(&tls, sp);
@ -87,7 +87,7 @@ static textwindows dontinstrument uint32_t __itimer_worker(void *arg) {
return 0; return 0;
} }
static textwindows void __itimer_setup(void) { textwindows static void __itimer_setup(void) {
__itimer.thread = CreateThread(0, STACK_SIZE, __itimer_worker, 0, __itimer.thread = CreateThread(0, STACK_SIZE, __itimer_worker, 0,
kNtStackSizeParamIsAReservation, 0); kNtStackSizeParamIsAReservation, 0);
} }

View file

@ -151,7 +151,7 @@ void _pthread_decimate(enum PosixThreadStatus threshold) {
} }
} }
dontinstrument static int PosixThread(void *arg, int tid) { static int PosixThread(void *arg) {
struct PosixThread *pt = arg; struct PosixThread *pt = arg;
// setup scheduling // setup scheduling
@ -162,11 +162,11 @@ dontinstrument static int PosixThread(void *arg, int tid) {
// setup signal stack // setup signal stack
if (pt->pt_attr.__sigaltstacksize) { if (pt->pt_attr.__sigaltstacksize) {
struct sigaltstack ss; struct sigaltstack *ss = alloca(sizeof(struct sigaltstack));
ss.ss_sp = pt->pt_attr.__sigaltstackaddr; ss->ss_sp = pt->pt_attr.__sigaltstackaddr;
ss.ss_size = pt->pt_attr.__sigaltstacksize; ss->ss_size = pt->pt_attr.__sigaltstacksize;
ss.ss_flags = 0; ss->ss_flags = 0;
unassert(!sigaltstack(&ss, 0)); unassert(!sigaltstack(ss, 0));
} }
// set long jump handler so pthread_exit can bring control back here // set long jump handler so pthread_exit can bring control back here

View file

@ -10,7 +10,6 @@ COSMOPOLITAN_C_START_
struct CosmoFtrace { /* 16 */ struct CosmoFtrace { /* 16 */
char ft_once; /* 0 */ char ft_once; /* 0 */
char ft_noreentry; /* 1 */
int ft_skew; /* 4 */ int ft_skew; /* 4 */
int64_t ft_lastaddr; /* 8 */ int64_t ft_lastaddr; /* 8 */
}; };