Make fixes and improvements

- Document more compiler flags
- Expose new __print_maps() api
- Better overflow checking in mmap()
- Improve the shell example somewhat
- Fix minor runtime bugs regarding stacks
- Make kill() on fork()+execve()'d children work
- Support CLONE_CHILD_CLEARTID for proper joining
- Fix recent possible deadlock regression with --ftrace
This commit is contained in:
Justine Tunney 2022-05-19 16:57:49 -07:00
parent 6e52cba37a
commit ec2cb88058
68 changed files with 1211 additions and 431 deletions

View file

@ -24,7 +24,6 @@
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/spinlock.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/nt/runtime.h"
@ -50,6 +49,9 @@ STATIC_YOINK("gettid"); // for kprintf()
#define LWP_DETACHED 0x00000040
#define LWP_SUSPENDED 0x00000080
__msabi extern typeof(TlsSetValue) *const __imp_TlsSetValue;
__msabi extern typeof(ExitThread) *const __imp_ExitThread;
struct CloneArgs {
union {
int tid;
@ -57,12 +59,11 @@ struct CloneArgs {
int64_t tid64;
};
int lock;
int flags;
int *ctid;
int *ztid;
char *tls;
int (*func)(void *);
void *arg;
void *pad; // TODO: Why does FreeBSD clobber this?
};
struct __tfork {
@ -76,31 +77,33 @@ static char tibdefault[64];
////////////////////////////////////////////////////////////////////////////////
// THE NEW TECHNOLOGY
uint32_t WinThreadThunk(void *warg);
asm(".section\t.text.windows,\"ax\",@progbits\n\t"
".local\tWinThreadThunk\n"
"WinThreadThunk:\n\t"
"xor\t%ebp,%ebp\n\t"
"mov\t%rcx,%rdi\n\t"
"mov\t%rcx,%rsp\n\t"
"and\t$-16,%rsp\n\t"
"push\t%rax\n\t"
"jmp\tWinThreadMain\n\t"
".size\tWinThreadThunk,.-WinThreadThunk\n\t"
".previous");
__attribute__((__used__, __no_reorder__))
int WinThreadLaunch(void *arg, int (*func)(void *), intptr_t rsp);
static textwindows wontreturn void
WinThreadMain(struct CloneArgs *wt) {
// we can't log this function because:
// 1. windows owns the backtrace pointer right now
// 2. ftrace unwinds rbp to determine depth
// we can't use address sanitizer because:
// 1. __asan_handle_no_return wipes stack
// 2. windows owns the stack memory right now
// we need win32 raw imports because:
// 1. generated thunks are function logged
noasan noinstrument static textwindows wontreturn void WinThreadEntry(
int rdi, int rsi, int rdx, struct CloneArgs *wt) {
int rc;
if (wt->flags & CLONE_SETTLS) {
TlsSetValue(__tls_index, wt->tls);
if (wt->tls) {
asm("mov\t%1,%%gs:%0"
: "=m"(*((long *)0x1480 + __tls_index))
: "r"(wt->tls));
}
if (wt->flags & CLONE_CHILD_SETTID) {
*wt->ctid = wt->tid;
}
rc = wt->func(wt->arg);
_Exit1(rc);
*wt->ctid = wt->tid;
rc = WinThreadLaunch(wt->arg, wt->func, (intptr_t)wt & -16);
// we can now clear ctid directly since we're no longer using our own
// stack memory, which can now be safely free'd by the parent thread.
*wt->ztid = 0;
// since we didn't indirect this function through NT2SYSV() it's not
// safe to simply return, and as such, we just call ExitThread().
__imp_ExitThread(rc);
unreachable;
}
static textwindows int CloneWindows(int (*func)(void *), char *stk,
@ -111,12 +114,12 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->flags = flags;
wt->ctid = ctid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->func = func;
wt->arg = arg;
wt->tls = tls;
if ((h = CreateThread(0, 0, WinThreadThunk, wt, 0, &wt->utid))) {
wt->tls = flags & CLONE_SETTLS ? tls : 0;
if ((h = CreateThread(0, 0, (void *)WinThreadEntry, wt, 0, &wt->utid))) {
CloseHandle(h);
return wt->tid;
} else {
@ -128,7 +131,7 @@ static textwindows int CloneWindows(int (*func)(void *), char *stk,
// XNU'S NOT UNIX
void XnuThreadThunk(void *pthread, int machport, void *(*func)(void *),
void *arg, intptr_t *stack, unsigned flags);
void *arg, intptr_t *stack, unsigned xnuflags);
asm(".local\tXnuThreadThunk\n"
"XnuThreadThunk:\n\t"
"xor\t%ebp,%ebp\n\t"
@ -141,11 +144,11 @@ __attribute__((__used__, __no_reorder__))
static wontreturn void
XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
struct CloneArgs *wt, unsigned flags) {
struct CloneArgs *wt, unsigned xnuflags) {
int ax;
wt->tid = tid;
_spunlock(&wt->lock);
if (wt->flags & CLONE_SETTLS) {
if (wt->tls) {
// XNU uses the same 0x30 offset as the WIN32 TIB x64. They told the
// Go team at Google that they Apply stands by our ability to use it
// https://github.com/golang/go/issues/23617#issuecomment-376662373
@ -154,10 +157,21 @@ XnuThreadMain(void *pthread, int tid, int (*func)(void *arg), void *arg,
: "0"(__NR_thread_fast_set_cthread_self), "D"(wt->tls - 0x30)
: "rcx", "r11", "memory", "cc");
}
if (wt->flags & CLONE_CHILD_SETTID) {
*wt->ctid = tid;
}
_Exit1(func(arg));
*wt->ctid = tid;
func(arg);
// we no longer use the stack after this point
// %rax = int bsdthread_terminate(%rdi = void *stackaddr,
// %rsi = size_t freesize,
// %rdx = uint32_t port,
// %r10 = uint32_t sem);
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
"xor\t%%r10d,%%r10d\n\t" // sem = 0
"syscall\n\t" // _Exit1()
"ud2"
: "=m"(*wt->ztid)
: "a"(0x2000000 | 361), "D"(0), "S"(0), "d"(0)
: "rcx", "r10", "r11", "memory");
unreachable;
}
static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
@ -180,9 +194,9 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->flags = flags;
wt->ctid = ctid;
wt->tls = tls;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = flags & CLONE_SETTLS ? tls : 0;
_seizelock(&wt->lock); // TODO: How can we get the tid without locking?
if ((rc = bsdthread_create(fn, arg, wt, 0, PTHREAD_START_CUSTOM_XNU)) != -1) {
_spinlock(&wt->lock);
@ -194,23 +208,18 @@ static int CloneXnu(int (*fn)(void *), char *stk, size_t stksz, int flags,
////////////////////////////////////////////////////////////////////////////////
// FREE BESIYATA DISHMAYA
void FreebsdThreadThunk(void *) wontreturn;
asm(".local\tFreebsdThreadThunk\n"
"FreebsdThreadThunk:\n\t"
"xor\t%ebp,%ebp\n\t"
"mov\t%rdi,%rsp\n\t"
"and\t$-16,%rsp\n\t"
"push\t%rax\n\t"
"jmp\tFreebsdThreadMain\n\t"
".size\tFreebsdThreadThunk,.-FreebsdThreadThunk");
__attribute__((__used__, __no_reorder__))
static wontreturn void
FreebsdThreadMain(struct CloneArgs *wt) {
if (wt->flags & CLONE_CHILD_SETTID) {
*wt->ctid = wt->tid;
}
_Exit1(wt->func(wt->arg));
static wontreturn void FreebsdThreadMain(void *p) {
struct CloneArgs *wt = p;
*wt->ctid = wt->tid;
wt->func(wt->arg);
// we no longer use the stack after this point
// void thr_exit(%rdi = long *state);
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
"syscall" // _Exit1()
: "=m"(*wt->ztid)
: "a"(431), "D"(0)
: "rcx", "r11", "memory");
unreachable;
}
static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
@ -222,16 +231,16 @@ static int CloneFreebsd(int (*func)(void *), char *stk, size_t stksz, int flags,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->flags = flags;
wt->ctid = ctid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->tls = tls;
wt->func = func;
wt->arg = arg;
struct thr_param params = {
.start_func = FreebsdThreadThunk,
.start_func = FreebsdThreadMain,
.arg = wt,
.stack_base = stk,
.stack_size = stksz,
.stack_size = (((intptr_t)wt - (intptr_t)stk) & -16) - 8,
.tls_base = flags & CLONE_SETTLS ? tls : 0,
.tls_size = flags & CLONE_SETTLS ? tlssz : 0,
.child_tid = &wt->tid64,
@ -277,7 +286,15 @@ __attribute__((__used__, __no_reorder__))
static privileged wontreturn void
OpenbsdThreadMain(struct CloneArgs *wt) {
_Exit1(wt->func(wt->arg));
wt->func(wt->arg);
// we no longer use the stack after this point
// void __threxit(%rdi = int32_t *notdead);
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
"syscall" // _Exit1()
: "=m"(*wt->ztid)
: "a"(302), "D"(0)
: "rcx", "r11", "memory");
unreachable;
}
static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
@ -288,8 +305,8 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
wt = (struct CloneArgs *)(((intptr_t)(stk + stksz) -
sizeof(struct CloneArgs)) &
-alignof(struct CloneArgs));
wt->flags = flags;
wt->ctid = ctid;
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->func = func;
wt->arg = arg;
params.tf_stack = wt;
@ -306,12 +323,19 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
// NET BESIYATA DISHMAYA
static wontreturn void NetbsdThreadMain(void *arg, int (*func)(void *arg),
int *tid, int *ctid, int flags) {
int rc;
if (flags & CLONE_CHILD_SETTID) {
*ctid = *tid;
}
_Exit1(func(arg));
int *tid, int *ctid, int *ztid) {
int ax, dx;
*ctid = *tid;
func(arg);
// we no longer use the stack after this point
// %eax = int __lwp_exit(void);
asm volatile("movl\t$0,%2\n\t" // *wt->ztid = 0
"syscall\n\t" // _Exit1()
"ud2"
: "=a"(ax), "=d"(dx), "=m"(*ztid)
: "0"(310)
: "rcx", "r11", "memory");
unreachable;
}
static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
@ -325,8 +349,10 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
intptr_t dx, sp;
static bool once;
static int broken;
struct ucontext_netbsd *ctx;
struct ucontext_netbsd ctx;
static struct ucontext_netbsd netbsd_clone_template;
// memoize arbitrary valid processor state structure
if (!once) {
asm volatile(CFLAG_ASM("syscall")
: CFLAG_CONSTRAINT(failed), "=a"(ax)
@ -343,31 +369,34 @@ static int CloneNetbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
}
sp = (intptr_t)(stk + stksz);
sp -= sizeof(int);
sp = sp & -alignof(int);
tid = (int *)sp;
sp -= sizeof(*ctx);
sp = sp & -alignof(*ctx);
ctx = (struct ucontext_netbsd *)sp;
memcpy(ctx, &netbsd_clone_template, sizeof(*ctx));
ctx->uc_link = 0;
ctx->uc_mcontext.rbp = 0;
ctx->uc_mcontext.rsp = sp;
ctx->uc_mcontext.rip = (intptr_t)NetbsdThreadMain;
ctx->uc_mcontext.rdi = (intptr_t)arg;
ctx->uc_mcontext.rsi = (intptr_t)func;
ctx->uc_mcontext.rdx = (intptr_t)tid;
ctx->uc_mcontext.rcx = (intptr_t)ctid;
ctx->uc_mcontext.r8 = flags;
ctx->uc_flags |= _UC_STACK;
ctx->uc_stack.ss_sp = stk;
ctx->uc_stack.ss_size = stksz;
ctx->uc_stack.ss_flags = 0;
sp = sp & -16;
sp -= 8;
// pass parameters in process state
memcpy(&ctx, &netbsd_clone_template, sizeof(ctx));
ctx.uc_link = 0;
ctx.uc_mcontext.rbp = 0;
ctx.uc_mcontext.rsp = sp;
ctx.uc_mcontext.rip = (intptr_t)NetbsdThreadMain;
ctx.uc_mcontext.rdi = (intptr_t)arg;
ctx.uc_mcontext.rsi = (intptr_t)func;
ctx.uc_mcontext.rdx = (intptr_t)tid;
ctx.uc_mcontext.rcx = (intptr_t)(flags & CLONE_CHILD_SETTID ? ctid : tid);
ctx.uc_mcontext.r8 = (intptr_t)(flags & CLONE_CHILD_CLEARTID ? ctid : tid);
ctx.uc_flags |= _UC_STACK;
ctx.uc_stack.ss_sp = stk;
ctx.uc_stack.ss_size = stksz;
ctx.uc_stack.ss_flags = 0;
if (flags & CLONE_SETTLS) {
ctx->uc_flags |= _UC_TLSBASE;
ctx->uc_mcontext._mc_tlsbase = (intptr_t)tls;
ctx.uc_flags |= _UC_TLSBASE;
ctx.uc_mcontext._mc_tlsbase = (intptr_t)tls;
}
// perform the system call
asm volatile(CFLAG_ASM("syscall")
: CFLAG_CONSTRAINT(failed), "=a"(ax), "=d"(dx)
: "1"(__NR__lwp_create), "D"(ctx), "S"(LWP_DETACHED), "2"(tid)
: "1"(__NR__lwp_create), "D"(&ctx), "S"(LWP_DETACHED), "2"(tid)
: "rcx", "r11", "memory");
if (!failed) {
return *tid;
@ -388,6 +417,12 @@ int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
int ax;
intptr_t *stack = (intptr_t *)(stk + stksz);
*--stack = (intptr_t)arg;
// %rax = syscall(%rax = __NR_clone,
// %rdi = flags,
// %rsi = child_stack,
// %rdx = parent_tidptr,
// %r10 = child_tidptr,
// %r8 = new_tls);
asm volatile("mov\t%4,%%r10\n\t" // ctid
"mov\t%5,%%r8\n\t" // tls
"mov\t%6,%%r9\n\t" // func
@ -398,10 +433,11 @@ int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
"pop\t%%rdi\n\t" // arg
"call\t*%%r9\n\t" // func
"xchg\t%%eax,%%edi\n\t"
"jmp\t_Exit1\n1:"
"mov\t$0x3c,%%eax\n\t"
"syscall\n1:"
: "=a"(ax)
: "0"(__NR_clone_linux), "D"(flags), "S"(stack), "g"(ctid),
"g"(tls), "g"(func)
"g"(tls), "g"(func), "d"(ptid)
: "rcx", "r8", "r9", "r10", "r11", "memory");
if (ax > -4096u) errno = -ax, ax = -1;
return ax;
@ -419,8 +455,7 @@ int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
* function should be synchronized using shared memory operations.
*
* Any memory that's required by this system call wrapper is allocated
* to the top of your stack. This is normally about 64 bytes, although
* on NetBSD it's currently 800.
* to the top of your stack. This shouldn't be more than 128 bytes.
*
* Your function is called from within the stack you specify. A return
* address is pushed onto your stack, that causes returning to jump to
@ -464,9 +499,25 @@ int CloneLinux(int (*func)(void *), char *stk, size_t stksz, int flags,
* and it's advised to have the bottom-most page, be a guard page
* @param flags should have:
* - `CLONE_THREAD|CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND`
* and may optionally bitwise any of the following:
* - `CLONE_CHILD_SETTID` is needed too if you use `ctid`
* - `CLONE_SETTLS` is needed too if you set `tls`
* and you may optionally bitwise or any of the following:
* - `CLONE_CHILD_SETTID` is needed too if you use `ctid` which
* is part of the memory the child owns and it'll be set right
* before the callback function is invoked
* - `CLONE_CHILD_CLEARTID` causes `*ctid = 0` upon termination
* which can be used to implement join so that the parent may
* safely free the stack memory that the child is using
* - `CLONE_PARENT_SETTID` is needed too if you use `ptid` and this
* is guaranteed to happen before clone() returns
* - `CLONE_SETTLS` is needed too if you set `tls`. You may get this
* value from the thread by calling __get_tls(). There are a few
* layout expectations imposed by your C library. Those are all
* documented by __initialize_tls() which initializes the parts of
* the first 64 bytes of tls memory that libc cares about. Also
* note that if you decide to use tls once then you must use it
* for everything, since this flag also flips a runtime state that
* enables it for the main thread and functions such as
* __errno_location() will begin assuming they can safely access
* the tls segment register.
* @param arg will be passed to your callback
* @param tls may be used to set the thread local storage segment;
* this parameter is ignored if `CLONE_SETTLS` is not set
@ -506,10 +557,12 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
rc = einval();
} else if (IsLinux()) {
rc = CloneLinux(func, stk, stksz, flags, arg, ptid, tls, tlssz, ctid);
} else if (!IsTiny() && (flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID)) !=
(CLONE_THREAD | CLONE_VM | CLONE_FS |
CLONE_FILES | CLONE_SIGHAND)) {
} else if (!IsTiny() &&
(flags & ~(CLONE_SETTLS | CLONE_PARENT_SETTID |
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) !=
(CLONE_THREAD | CLONE_VM | CLONE_FS | CLONE_FILES |
CLONE_SIGHAND)) {
STRACE("clone flag unsupported on this platform");
rc = einval();
} else if (IsXnu()) {
rc = CloneXnu(func, stk, stksz, flags, arg, tls, tlssz, ctid);
@ -525,7 +578,12 @@ int clone(int (*func)(void *), void *stk, size_t stksz, int flags, void *arg,
rc = enosys();
}
STRACE("clone(%p, %p, %'zu, %#x, %p, %p, %p, %'zu, %p) → %d", func, stk,
if (rc != -1 && (flags & CLONE_PARENT_SETTID)) {
*ptid = rc;
}
STRACE("clone(%p, %p, %'zu, %#x, %p, %p, %p, %'zu, %p) → %d% m", func, stk,
stksz, flags, arg, ptid, tls, tlssz, ctid, rc);
return rc;
}

View file

@ -17,9 +17,11 @@
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/bits/safemacros.internal.h"
#include "libc/calls/calls.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/lockcmpxchgp.h"
#include "libc/intrin/spinlock.h"
#include "libc/log/libfatal.internal.h"
#include "libc/macros.internal.h"
@ -49,7 +51,7 @@
void ftrace_hook(void);
_Alignas(64) char ftrace_lock;
_Alignas(64) int ftrace_lock;
static struct Ftrace {
int skew;
@ -75,6 +77,32 @@ static privileged int GetNestingLevel(struct StackFrame *frame) {
return MIN(MAX_NESTING, nesting);
}
static privileged inline void ReleaseFtraceLock(void) {
int zero = 0;
__atomic_store(&ftrace_lock, &zero, __ATOMIC_RELAXED);
}
static privileged inline bool AcquireFtraceLock(void) {
int me, owner, tries;
for (tries = 0, me = gettid();;) {
owner = 0;
if (_lockcmpxchgp(&ftrace_lock, &owner, me)) {
return true;
}
if (owner == me) {
// we ignore re-entry into ftrace. while the code and build config
// is written to make re-entry highly unlikely, it's impossible to
// guarantee. there's also the possibility of asynchronous signals
return false;
}
if (++tries & 7) {
__builtin_ia32_pause();
} else {
sched_yield();
}
}
}
/**
* Prints name of function being called.
*
@ -83,21 +111,22 @@ static privileged int GetNestingLevel(struct StackFrame *frame) {
* according to the System Five NexGen32e ABI.
*/
privileged void ftracer(void) {
long stackuse;
uint64_t stamp;
size_t stackuse;
struct StackFrame *frame;
_spinlock_cooperative(&ftrace_lock);
stamp = rdtsc();
frame = __builtin_frame_address(0);
frame = frame->next;
if (frame->addr != g_ftrace.lastaddr) {
stackuse = ROUNDUP((intptr_t)frame, GetStackSize()) - (intptr_t)frame;
kprintf("%rFUN %5P %'13T %'*lu %*s%t\r\n", g_ftrace.stackdigs, stackuse,
GetNestingLevel(frame) * 2, "", frame->addr);
g_ftrace.laststamp = X86_HAVE(RDTSCP) ? rdtscp(0) : rdtsc();
g_ftrace.lastaddr = frame->addr;
if (AcquireFtraceLock()) {
stamp = rdtsc();
frame = __builtin_frame_address(0);
frame = frame->next;
if (frame->addr != g_ftrace.lastaddr) {
stackuse = (intptr_t)GetStackAddr(0) + GetStackSize() - (intptr_t)frame;
kprintf("%rFUN %5P %'13T %'*ld %*s%t\r\n", g_ftrace.stackdigs, stackuse,
GetNestingLevel(frame) * 2, "", frame->addr);
g_ftrace.laststamp = X86_HAVE(RDTSCP) ? rdtscp(0) : rdtsc();
g_ftrace.lastaddr = frame->addr;
}
ReleaseFtraceLock();
}
_spunlock(&ftrace_lock);
}
textstartup int ftrace_install(void) {

View file

@ -31,7 +31,7 @@
#include "libc/zipos/zipos.internal.h"
static char g_lock;
static struct SymbolTable *g_symtab;
hidden struct SymbolTable *__symtab; // for kprintf
/**
* Looks for `.symtab` in zip central directory.
@ -70,6 +70,7 @@ static struct SymbolTable *GetSymbolTableFromZip(struct Zipos *zipos) {
memcpy(res, (void *)ZIP_LFILE_CONTENT(zipos->map + lf), size);
break;
#if 0
// TODO(jart): fix me
case kZipCompressionDeflate:
rc = undeflate(res, size, (void *)ZIP_LFILE_CONTENT(zipos->map + lf),
GetZipLfileCompressedSize(zipos->map + lf), &ds);
@ -121,21 +122,21 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
struct SymbolTable *GetSymbolTable(void) {
struct Zipos *z;
if (_trylock(&g_lock)) return 0;
if (!g_symtab && !__isworker) {
if (!__symtab && !__isworker) {
if (weaken(__zipos_get) && (z = weaken(__zipos_get)())) {
if ((g_symtab = GetSymbolTableFromZip(z))) {
g_symtab->names =
(uint32_t *)((char *)g_symtab + g_symtab->names_offset);
g_symtab->name_base =
(char *)((char *)g_symtab + g_symtab->name_base_offset);
if ((__symtab = GetSymbolTableFromZip(z))) {
__symtab->names =
(uint32_t *)((char *)__symtab + __symtab->names_offset);
__symtab->name_base =
(char *)((char *)__symtab + __symtab->name_base_offset);
}
}
if (!g_symtab) {
g_symtab = GetSymbolTableFromElf();
if (!__symtab) {
__symtab = GetSymbolTableFromElf();
}
}
_spunlock(&g_lock);
return g_symtab;
return __symtab;
}
/**
@ -144,11 +145,14 @@ struct SymbolTable *GetSymbolTable(void) {
* @param t if null will be auto-populated only if already open
* @return index or -1 if nothing found
*/
privileged int __get_symbol(struct SymbolTable *t, intptr_t a) {
/* asan runtime depends on this function */
noinstrument privileged int __get_symbol(struct SymbolTable *t, intptr_t a) {
// we need privileged because:
// kprintf is privileged and it depends on this
// we don't want function tracing because:
// function tracing depends on this function via kprintf
unsigned l, m, r, n, k;
if (!t && g_symtab) {
t = g_symtab;
if (!t && __symtab) {
t = __symtab;
}
if (t) {
l = 0;

View file

@ -39,7 +39,7 @@
static void *MoveMemoryIntervals(struct MemoryInterval *d,
const struct MemoryInterval *s, int n) {
/* asan runtime depends on this function */
// asan runtime depends on this function
int i;
assert(n >= 0);
if (d > s) {
@ -55,7 +55,7 @@ static void *MoveMemoryIntervals(struct MemoryInterval *d,
}
static void RemoveMemoryIntervals(struct MemoryIntervals *mm, int i, int n) {
/* asan runtime depends on this function */
// asan runtime depends on this function
assert(i >= 0);
assert(i + n <= mm->i);
MoveMemoryIntervals(mm->p + i, mm->p + i + n, mm->i - (i + n));
@ -71,7 +71,7 @@ static bool ExtendMemoryIntervals(struct MemoryIntervals *mm) {
base = (char *)kMemtrackStart;
prot = PROT_READ | PROT_WRITE;
flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED;
/* TODO(jart): These map handles should not leak across NT fork() */
// TODO(jart): These map handles should not leak across NT fork()
if (mm->p == mm->s) {
if (IsAsan()) {
shad = (char *)(((intptr_t)base >> 3) + 0x7fff8000);
@ -100,7 +100,7 @@ static bool ExtendMemoryIntervals(struct MemoryIntervals *mm) {
}
int CreateMemoryInterval(struct MemoryIntervals *mm, int i) {
/* asan runtime depends on this function */
// asan runtime depends on this function
int rc;
rc = 0;
assert(i >= 0);
@ -192,7 +192,7 @@ int ReleaseMemoryIntervals(struct MemoryIntervals *mm, int x, int y,
int TrackMemoryInterval(struct MemoryIntervals *mm, int x, int y, long h,
int prot, int flags, bool readonlyfile, bool iscow,
long offset, long size) {
/* asan runtime depends on this function */
// asan runtime depends on this function
unsigned i;
assert(y >= x);
assert(AreMemoryIntervalsOk(mm));

View file

@ -1,6 +1,7 @@
#ifndef COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_
#define COSMOPOLITAN_LIBC_RUNTIME_MEMTRACK_H_
#include "libc/assert.h"
#include "libc/bits/midpoint.h"
#include "libc/dce.h"
#include "libc/macros.internal.h"
#include "libc/nt/enum/version.h"
@ -168,7 +169,7 @@ forceinline unsigned FindMemoryInterval(const struct MemoryIntervals *mm,
l = 0;
r = mm->i;
while (l < r) {
m = (l + r) >> 1;
m = _midpoint(l, r);
if (mm->p[m].y < x) {
l = m + 1;
} else {

View file

@ -68,57 +68,83 @@ static wontreturn void OnUnrecoverableMmapError(const char *s) {
_Exit(199);
}
noasan static bool IsMapped(char *p, size_t n) {
return OverlapsImageSpace(p, n) || IsMemtracked(FRAME(p), FRAME(p + (n - 1)));
static noasan inline bool OverlapsExistingMapping(char *p, size_t n) {
int a, b, i;
assert(n > 0);
a = FRAME(p);
b = FRAME(p + (n - 1));
i = FindMemoryInterval(&_mmi, a);
if (i < _mmi.i) {
if (a <= _mmi.p[i].x && _mmi.p[i].x <= b) return true;
if (a <= _mmi.p[i].y && _mmi.p[i].y <= b) return true;
if (_mmi.p[i].x <= a && b <= _mmi.p[i].y) return true;
}
return false;
}
noasan static bool NeedAutomap(char *p, size_t n) {
return !p || OverlapsArenaSpace(p, n) || OverlapsShadowSpace(p, n) ||
IsMapped(p, n);
}
noasan static bool ChooseMemoryInterval(int x, int n, int *res) {
int i;
static noasan bool ChooseMemoryInterval(int x, int n, int align, int *res) {
int i, start, end;
assert(align > 0);
if (_mmi.i) {
// find the start of the automap memory region
i = FindMemoryInterval(&_mmi, x);
if (i < _mmi.i) {
if (x + n < _mmi.p[i].x) {
*res = x;
return true;
// check to see if there's space available before the first entry
if (!__builtin_add_overflow(x, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
if (end < _mmi.p[i].x) {
*res = start;
return true;
}
}
}
// check to see if there's space available between two entries
while (++i < _mmi.i) {
if (_mmi.p[i].x - _mmi.p[i - 1].y > n) {
*res = _mmi.p[i - 1].y + 1;
return true;
if (!__builtin_add_overflow(_mmi.p[i - 1].y, 1, &start) &&
!__builtin_add_overflow(start, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
if (end < _mmi.p[i].x) {
*res = start;
return true;
}
}
}
}
}
if (INT_MAX - _mmi.p[i - 1].y >= n) {
*res = _mmi.p[i - 1].y + 1;
return true;
// otherwise append after the last entry if space is available
if (!__builtin_add_overflow(_mmi.p[i - 1].y, 1, &start) &&
!__builtin_add_overflow(start, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
*res = start;
return true;
}
}
return false;
} else {
*res = x;
return true;
// if memtrack is empty, then just assign the requested address
// assuming it doesn't overflow
if (!__builtin_add_overflow(x, align - 1, &start)) {
start &= -align;
if (!__builtin_add_overflow(start, n - 1, &end)) {
*res = start;
return true;
}
}
}
return false;
}
noasan static bool Automap(int n, int *res) {
*res = -1;
if (ChooseMemoryInterval(FRAME(kAutomapStart), n, res)) {
assert(*res >= FRAME(kAutomapStart));
if (*res + n <= FRAME(kAutomapStart + (kAutomapStart - 1))) {
return true;
} else {
STRACE("mmap(%.12p, %p) ENOMEM (automap interval exhausted)", ADDR(*res),
ADDR(n + 1));
return false;
}
} else {
STRACE("mmap(%.12p, %p) ENOMEM (automap failed)", ADDR(*res), ADDR(n + 1));
return false;
}
noasan static bool Automap(int count, int align, int *res) {
return ChooseMemoryInterval(FRAME(kAutomapStart), count, align, res) &&
*res + count <= FRAME(kAutomapStart + (kAutomapSize - 1));
}
noasan static size_t GetMemtrackSize(struct MemoryIntervals *mm) {
@ -221,21 +247,16 @@ static noasan inline void *Mmap(void *addr, size_t size, int prot, int flags,
}
#endif
char *p = addr;
bool needguard;
struct DirectMap dm;
size_t virtualused, virtualneed;
int a, b, i, f, m, n, x;
bool needguard, clashes;
size_t virtualused, virtualneed;
if (UNLIKELY(!size)) {
STRACE("size=0");
return VIP(einval());
}
if (UNLIKELY(!IsLegalSize(size))) {
STRACE("size isn't 48-bit");
return VIP(einval());
}
if (UNLIKELY(!IsLegalPointer(p))) {
STRACE("p isn't 48-bit");
return VIP(einval());
@ -266,29 +287,28 @@ static noasan inline void *Mmap(void *addr, size_t size, int prot, int flags,
return VIP(einval());
}
if (UNLIKELY(INT64_MAX - size < off)) {
STRACE("too large");
return VIP(einval());
}
if (UNLIKELY(!ALIGNED(off))) {
STRACE("p isn't 64kb aligned");
return VIP(einval());
}
if ((flags & MAP_FIXED_NOREPLACE) && IsMapped(p, size)) {
#ifdef SYSDEBUG
if (OverlapsImageSpace(p, size)) {
STRACE("overlaps image");
} else {
STRACE("overlaps existing");
if (fd == -1) {
size = ROUNDUP(size, FRAMESIZE);
if (IsWindows()) {
prot |= PROT_WRITE; /* kludge */
}
#endif
return VIP(efault());
} else if (__isfdkind(fd, kFdZip)) {
STRACE("fd is zipos handle");
return VIP(einval());
}
if (__isfdkind(fd, kFdZip)) {
STRACE("fd is zipos handle");
if (UNLIKELY(!IsLegalSize(size))) {
STRACE("size isn't 48-bit");
return VIP(einval());
}
if (UNLIKELY(INT64_MAX - size < off)) {
STRACE("too large");
return VIP(einval());
}
@ -301,15 +321,29 @@ static noasan inline void *Mmap(void *addr, size_t size, int prot, int flags,
return VIP(enomem());
}
if (fd == -1) {
size = ROUNDUP(size, FRAMESIZE);
if (IsWindows()) {
prot |= PROT_WRITE; /* kludge */
}
clashes = OverlapsImageSpace(p, size) || OverlapsExistingMapping(p, size);
if ((flags & MAP_FIXED_NOREPLACE) && clashes) {
STRACE("noreplace overlaps existing");
return VIP(eexist());
}
if (__builtin_add_overflow((int)(size >> 16), (int)!!(size & (FRAMESIZE - 1)),
&n)) {
STRACE("memory range overflows");
return VIP(einval());
}
// if size is a two power then automap will use it as alignment
if (IS2POW(size)) {
a = size >> 16;
if (!a) {
a = 1;
}
} else {
a = 1;
}
n = (int)(size >> 16) + !!(size & (FRAMESIZE - 1));
assert(n > 0);
f = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
if (flags & MAP_FIXED) {
x = FRAME(p);
@ -318,10 +352,11 @@ static noasan inline void *Mmap(void *addr, size_t size, int prot, int flags,
OnUnrecoverableMmapError("FIXED UNTRACK FAILED");
}
}
} else if (!NeedAutomap(p, size)) {
} else if (p && !clashes && !OverlapsArenaSpace(p, size) &&
!OverlapsShadowSpace(p, size)) {
x = FRAME(p);
} else if (!Automap(n, &x)) {
STRACE("AUTOMAP OUT OF MEMORY D:");
} else if (!Automap(n, a, &x)) {
STRACE("automap has no room for %d frames with %d alignment", n, a);
return VIP(enomem());
}

View file

@ -337,7 +337,7 @@ textstartup void __printargs(const char *prologue) {
PRINT("");
PRINT("MEMTRACK");
PrintMemoryIntervals(2, &_mmi);
__print_maps();
PRINT("");
PRINT("TERMIOS");

30
libc/runtime/printmaps.c Normal file
View file

@ -0,0 +1,30 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/intrin/spinlock.h"
#include "libc/runtime/memtrack.internal.h"
#include "libc/runtime/runtime.h"
/**
* Prints memory mappings to stderr.
*/
void __print_maps(void) {
_spinlock(&_mmi.lock);
PrintMemoryIntervals(2, &_mmi);
_spunlock(&_mmi.lock);
}

View file

@ -112,6 +112,7 @@ void __morph_begin(void);
void __morph_end(void);
unsigned char *GetFirstInstruction(void);
unsigned char *GetInstructionLengths(void);
void __print_maps(void);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -64,6 +64,7 @@ $(LIBC_RUNTIME_A).pkg: \
# this is the function tracing runtime
o/$(MODE)/libc/runtime/ftracer.o: \
OVERRIDE_CFLAGS += \
-x-no-pg \
-mno-fentry \
-ffreestanding \
-fno-sanitize=all
@ -86,8 +87,7 @@ o/$(MODE)/libc/runtime/print.greg.o \
o/$(MODE)/libc/runtime/stackchkfail.o \
o/$(MODE)/libc/runtime/stackchkfaillocal.o \
o/$(MODE)/libc/runtime/winmain.greg.o \
o/$(MODE)/libc/runtime/opensymboltable.o \
o/$(MODE)/libc/runtime/getsymboltable.greg.o: \
o/$(MODE)/libc/runtime/opensymboltable.o: \
OVERRIDE_CFLAGS += \
-Os \
-ffreestanding \

View file

@ -12,6 +12,7 @@
* This defaults to `STACKSIZE`. The bottom-most page will be protected
* to ensure your stack does not magically grow beyond this value. It's
* possible to detect stack overflows, by calling `ShowCrashReports()`.
* Your stack size must be a power of two; the linker will check this.
*
* If you want to know how much stack your programs needs, then
*
@ -28,11 +29,17 @@
/**
* Tunes APE stack virtual address.
*
* This defaults to `0x7e0000000000 - STACKSIZE`. The value defined by
* this macro will be respected, with two exceptions: (1) in MODE=tiny
* the operating system provided stack is used instead and (2) Windows
* Seven doesn't support 64-bit addresses so 0x10000000 - GetStackSize
* is used instead.
* This value must be aligned according to your stack size, and that's
* checked by your linker script. This defaults to `0x700000000000` so
*
* 1. It's easy to see how close you are to the bottom
* 2. The linker script error is unlikely to happen
*
* This macro will be respected, with two exceptions
*
* 1. In MODE=tiny the operating system provided stack is used instead
* 2. Windows 7 doesn't support 64-bit addresses, so we'll instead use
* `0x10000000 - GetStackSize()` as the stack address
*
* @see libc/sysv/systemfive.S
* @see libc/nt/winmain.greg.c
@ -56,10 +63,20 @@ extern char ape_stack_prot[] __attribute__((__weak__));
extern char ape_stack_memsz[] __attribute__((__weak__));
extern char ape_stack_align[] __attribute__((__weak__));
/**
* Returns size of stack, which is always a two power.
*/
#define GetStackSize() ((uintptr_t)ape_stack_memsz)
/**
* Returns address of bottom of stack.
*
* This takes into consideration threads and sigaltstack. This is
* implemented as a fast pure expression, since we're able to make the
* assumption that stack sizes are two powers and aligned. This is
* thanks to (1) the linker script checks the statically chosen sizes,
* and (2) the mmap() address picker will choose aligned addresses when
* the provided size is a two power.
*/
#define GetStackAddr(ADDEND) \
((void *)((((intptr_t)__builtin_frame_address(0) - 1) & -GetStackSize()) + \
@ -67,6 +84,12 @@ extern char ape_stack_align[] __attribute__((__weak__));
/**
* Returns preferred bottom address of stack.
*
* This is the stakc address of the main process. The only time that
* isn't guaranteed to be the case is in MODE=tiny, since it doesn't
* link the code for stack creation at startup. This generally isn't
* problematic, since MODE=tiny doesn't use any of the runtime codes
* which want the stack to be cheaply knowable, e.g. ftrace, kprintf
*/
#define GetStaticStackAddr(ADDEND) \
({ \

View file

@ -0,0 +1,46 @@
/*-*- mode:unix-assembly; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/macros.internal.h"
.text.windows
// Used by clone() on Windows to launch thread.
//
// Windows owns the stack memory when we initially enter threads.
// This function switches us over, so that we can start using the
// runtime facilities.
//
// @param %rdi is arg
// @param %rsi is func
// @param %rdx is stack
// @return %rax is exit code
// @see clone()
WinThreadLaunch:
push %rbx
push %r15
mov %rbp,%r15
mov %rsp,%rbx
mov %rdx,%rsp
xor %rbp,%rbp
call *%rsi
mov %r15,%rbp
mov %rbx,%rsp
pop %r15
pop %rbx
ret
.endfn WinThreadLaunch,globl,hidden