Support Linux binfmt_misc and APE loading on Apple

The "no modify self" variant of Actually Portable Executable is now
supported on all platforms. If you use `$(APE_NO_MODIFY_SELF)` then
ld.bfd will embed a 4096 byte ELF binary and a 4096 byte Macho file
which are installed on the fly to ${TMPDIR:-/tmp}, which enables us
launch the executable, without needing to copy the whole executable

To prevent it from copying a tiny executable to your temp directory
you need to install the `ape` command (renamed from ape-loader), to
a system path. For example:

    # FreeBSD / NetBSD / OpenBSD
    make -j8 o//ape/ape
    cp o//ape/ape /usr/bin/ape

    # Mac OS
    # make -j8 o//ape/ape.macho
    curl https://justine.lol/ape.macho >/usr/bin/ape
    chmod +x /usr/bin/ape

On Linux you can get even more performance with the new binfmt_misc
support which makes launching non-modifying APE binaries as fast as
launching ELF executables. Running the following command:

    # Linux
    ape/apeinstall.sh

Will copy APE loader to /usr/bin/ape and register with binfmt_misc
Lastly, this change also fixes a really interesting race condition
with OpenBSD thread joining.
This commit is contained in:
Justine Tunney 2022-05-21 07:52:58 -07:00
parent 7838edae88
commit db0d8dd806
31 changed files with 1089 additions and 305 deletions

View file

@ -64,7 +64,7 @@ noasan texthead uint64_t __new_page(struct mman *mm) {
* Returns pointer to page table entry for page at virtual address.
* Additional page tables are allocated if needed as a side-effect.
*/
noasan texthead uint64_t *__get_virtual(struct mman *mm, uint64_t *t,
noasan textreal uint64_t *__get_virtual(struct mman *mm, uint64_t *t,
int64_t vaddr, bool maketables) {
uint64_t *e, p;
unsigned char h;

34
libc/calls/nanos.c Normal file
View file

@ -0,0 +1,34 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/timespec.h"
#include "libc/time/time.h"
/**
* Returns nanoseconds since UNIX epoch.
*/
int128_t _nanos(int timer) {
int128_t nanos;
struct timespec ts;
clock_gettime(timer, &ts);
nanos = ts.tv_sec;
nanos *= 1000000000;
nanos += ts.tv_nsec;
return nanos;
}

10
libc/calls/nanos.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_NANOS_H_
#define COSMOPOLITAN_LIBC_CALLS_NANOS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
int128_t _nanos(int);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_CALLS_NANOS_H_ */

View file

@ -21,6 +21,7 @@
#include "libc/calls/sigbits.h"
#include "libc/calls/strace.internal.h"
#include "libc/intrin/cmpxchg.h"
#include "libc/intrin/lockcmpxchg.h"
#include "libc/intrin/spinlock.h"
#include "libc/log/libfatal.internal.h"
#include "libc/macros.internal.h"
@ -131,9 +132,7 @@ static privileged bool __sig_deliver(bool restartable, int sig, int si_code,
// since sigaction() is @asyncsignalsafe we only restore it if the
// user didn't change it during the signal handler. we also don't
// need to do anything if this was a oneshot signal or nodefer.
_spinlock(&__sig_lock);
_cmpxchg(__sighandrvas + sig, (int32_t)(intptr_t)SIG_DFL, rva);
_spunlock(&__sig_lock);
_lockcmpxchg(__sighandrvas + sig, (int32_t)(intptr_t)SIG_DFL, rva);
}
if (!restartable) {

View file

@ -26,6 +26,8 @@
//
// @param rsp is [n,argv₀..argvₙ₋₁,0,envp₀..,0,auxv₀..,0,..]
// @note FreeBSD is special (see freebsd/lib/csu/amd64/...)
// @note NetBSD will only zero the call-clobbered registers
// @note ape.S and ape-loader both set RCX to XNU on Darwin
// @noreturn
_start:
@ -34,12 +36,16 @@ _start:
test %rdi,%rdi
cmovnz %rdi,%rsp
jz 0f
movb $FREEBSD,__hostos(%rip)
movb $FREEBSD,%cl
0:
#endif
// set operating system when already detected
mov %cl,__hostos(%rip)
// get startup timestamp as early as possible
// its used by --strace flag and kprintf() %T
0: rdtsc
rdtsc
ezlea kStartTsc,bx
mov %eax,(%rbx)
mov %edx,4(%rbx)
@ -85,14 +91,3 @@ _start:
call cosmo
9: .unreachable
.endfn _start,weak,hidden
#if SupportsXnu()
// Macintosh userspace program entrypoint.
//
// @param rsp is [n,argv₀..argvₙ₋₁,0,envp₀..,0,auxv₀..,0,..]
// @note FreeBSD is special (see freebsd/lib/csu/amd64/...)
// @noreturn
_xnu: movb $XNU,__hostos(%rip)
jmp 0b
.endfn _xnu,weak,hidden
#endif

View file

@ -4,7 +4,7 @@
COSMOPOLITAN_C_START_
char *sleb64(char *, int64_t);
char *zleb64(char *, int64_t);
char *zleb64(char[hasatleast 10], int64_t);
char *uleb64(char[hasatleast 10], uint64_t);
int unzleb64(const char *, size_t, int64_t *);
int unuleb64(char *, size_t, uint64_t *);

View file

@ -44,7 +44,7 @@
* @return p + i
* @see unzleb64()
*/
char *zleb64(char *p, int64_t x) {
char *zleb64(char p[hasatleast 10], int64_t x) {
int c;
uint64_t u;
u = x;

View file

@ -58,7 +58,10 @@ struct CloneArgs {
uint32_t utid;
int64_t tid64;
};
int lock;
union {
int lock;
void *pstack;
};
int *ctid;
int *ztid;
char *tls;
@ -287,12 +290,18 @@ __attribute__((__used__, __no_reorder__))
static privileged wontreturn void
OpenbsdThreadMain(struct CloneArgs *wt) {
wt->func(wt->arg);
// we no longer use the stack after this point
// we no longer use the stack after this point. however openbsd
// validates the rsp register too so a race condition can still
// happen if the parent tries to free the stack. we'll solve it
// by simply changing rsp back to the old value before exiting!
// although ideally there should be a better solution.
//
// void __threxit(%rdi = int32_t *notdead);
asm volatile("movl\t$0,%0\n\t" // *wt->ztid = 0
asm volatile("mov\t%3,%%rsp\n\t"
"movl\t$0,%0\n\t" // *wt->ztid = 0
"syscall" // _Exit1()
: "=m"(*wt->ztid)
: "a"(302), "D"(0)
: "a"(302), "D"(0), "r"(wt->pstack)
: "rcx", "r11", "memory");
unreachable;
}
@ -307,6 +316,7 @@ static int CloneOpenbsd(int (*func)(void *), char *stk, size_t stksz, int flags,
-alignof(struct CloneArgs));
wt->ctid = flags & CLONE_CHILD_SETTID ? ctid : &wt->tid;
wt->ztid = flags & CLONE_CHILD_CLEARTID ? ctid : &wt->tid;
wt->pstack = __builtin_frame_address(0);
wt->func = func;
wt->arg = arg;
params.tf_stack = wt;

View file

@ -47,7 +47,7 @@ static struct Ftrace {
int64_t lastaddr;
} g_ftrace;
static privileged int GetNestingLevelImpl(struct StackFrame *frame) {
static privileged inline int GetNestingLevelImpl(struct StackFrame *frame) {
int nesting = -2;
while (frame) {
++nesting;
@ -56,7 +56,7 @@ static privileged int GetNestingLevelImpl(struct StackFrame *frame) {
return MAX(0, nesting);
}
static privileged int GetNestingLevel(struct StackFrame *frame) {
static privileged inline int GetNestingLevel(struct StackFrame *frame) {
int nesting;
nesting = GetNestingLevelImpl(frame);
if (nesting < g_ftrace.skew) g_ftrace.skew = nesting;

View file

@ -9,9 +9,9 @@
#define __BENCH_ARRAY(S) \
_Section(".piro.relo.sort.bench.2." #S ",\"aw\",@init_array #")
#define __TEST_PROTOTYPE(S, N, A, K) \
void S##_##N(void); \
const void *const S##_##N##_ptr[] A(S##_##N) = {S##_##N}; \
#define __TEST_PROTOTYPE(S, N, A, K) \
void S##_##N(void); \
testfn_t S##_##N##_ptr[] A(S##_##N) = {S##_##N}; \
testonly K void S##_##N(void)
#define __TEST_SECTION(NAME, CONTENT) \