Make execve() linger when it can't spoof parent

It's now possible to use execve() when the parent process isn't built by
cosmo. In such cases, the current process will kill all threads and then
linger around, waiting for the newly created process to die, and then we
propagate its exit code to the parent. This should help bazel and others

Allocating private anonymous memory is now 5x faster on Windows. This is
thanks to VirtualAlloc() which is faster than the file mapping APIs. The
fork() function also now goes 30% faster, since we are able to avoid the
VirtualProtect() calls on mappings in most cases now.

Fixes #1253
This commit is contained in:
Justine Tunney 2025-01-04 21:11:53 -08:00
parent c97a858470
commit 42a3bb729a
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
40 changed files with 612 additions and 656 deletions

View file

@ -1,242 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2021 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "dsp/core/core.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/rlimit.h"
#include "libc/calls/struct/timespec.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/directmap.h"
#include "libc/intrin/safemacros.h"
#include "libc/limits.h"
#include "libc/runtime/runtime.h"
#include "libc/stdio/rand.h"
#include "libc/stdio/stdio.h"
#include "libc/sysv/consts/auxv.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/rlimit.h"
#include "libc/sysv/consts/sig.h"
#include "libc/testlib/testlib.h"
#include "libc/time.h"
#include "libc/x/xsigaction.h"
#include "libc/x/xspawn.h"
#define MEM (64 * 1024 * 1024)
static char tmpname[PATH_MAX];
void OnSigxcpu(int sig) {
ASSERT_EQ(SIGXCPU, sig);
_Exit(0);
}
void OnSigxfsz(int sig) {
unlink(tmpname);
ASSERT_EQ(SIGXFSZ, sig);
_Exit(0);
}
TEST(setrlimit, testCpuLimit) {
int wstatus;
struct rlimit rlim;
struct timespec start;
double matrices[3][3][3];
if (IsWindows())
return; // of course it doesn't work on windows
if (IsXnu())
return; // TODO(jart): it worked before
if (IsOpenbsd())
return; // TODO(jart): fix flake
ASSERT_NE(-1, (wstatus = xspawn(0)));
if (wstatus == -2) {
ASSERT_EQ(0, xsigaction(SIGXCPU, OnSigxcpu, 0, 0, 0));
ASSERT_EQ(0, getrlimit(RLIMIT_CPU, &rlim));
rlim.rlim_cur = 1; // set soft limit to one second
ASSERT_EQ(0, setrlimit(RLIMIT_CPU, &rlim));
start = timespec_real();
do {
matmul3(matrices[0], matrices[1], matrices[2]);
matmul3(matrices[0], matrices[1], matrices[2]);
matmul3(matrices[0], matrices[1], matrices[2]);
matmul3(matrices[0], matrices[1], matrices[2]);
} while (timespec_sub(timespec_real(), start).tv_sec < 5);
_Exit(1);
}
EXPECT_TRUE(WIFEXITED(wstatus));
EXPECT_FALSE(WIFSIGNALED(wstatus));
EXPECT_EQ(0, WEXITSTATUS(wstatus));
EXPECT_EQ(0, WTERMSIG(wstatus));
}
TEST(setrlimit, testFileSizeLimit) {
char junkdata[512];
int i, fd, wstatus;
struct rlimit rlim;
if (IsWindows())
return; /* of course it doesn't work on windows */
ASSERT_NE(-1, (wstatus = xspawn(0)));
if (wstatus == -2) {
ASSERT_EQ(0, xsigaction(SIGXFSZ, OnSigxfsz, 0, 0, 0));
ASSERT_EQ(0, getrlimit(RLIMIT_FSIZE, &rlim));
rlim.rlim_cur = 1024 * 1024; /* set soft limit to one megabyte */
ASSERT_EQ(0, setrlimit(RLIMIT_FSIZE, &rlim));
snprintf(tmpname, sizeof(tmpname), "%s/%s.%d",
firstnonnull(getenv("TMPDIR"), "/tmp"),
firstnonnull(program_invocation_short_name, "unknown"), getpid());
ASSERT_NE(-1, (fd = open(tmpname, O_RDWR | O_CREAT | O_TRUNC, 0644)));
rngset(junkdata, 512, lemur64, -1);
for (i = 0; i < 5 * 1024 * 1024 / 512; ++i) {
ASSERT_EQ(512, write(fd, junkdata, 512));
}
close(fd);
unlink(tmpname);
_Exit(1);
}
EXPECT_TRUE(WIFEXITED(wstatus));
EXPECT_FALSE(WIFSIGNALED(wstatus));
EXPECT_EQ(0, WEXITSTATUS(wstatus));
EXPECT_EQ(0, WTERMSIG(wstatus));
}
int SetMemoryLimit(size_t n) {
struct rlimit rlim = {0};
getrlimit(RLIMIT_AS, &rlim);
rlim.rlim_cur = n;
rlim.rlim_max = n;
return setrlimit(RLIMIT_AS, &rlim);
}
TEST(setrlimit, testMemoryLimit) {
char *p;
bool gotsome;
int i, wstatus;
ASSERT_NE(-1, (wstatus = xspawn(0)));
if (wstatus == -2) {
ASSERT_EQ(0, SetMemoryLimit(MEM));
for (gotsome = false, i = 0; i < (MEM * 2) / getpagesize(); ++i) {
p = mmap(0, getpagesize(), PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p != MAP_FAILED) {
gotsome = true;
} else {
ASSERT_TRUE(gotsome);
ASSERT_EQ(ENOMEM, errno);
_Exit(0);
}
rngset(p, getpagesize(), lemur64, -1);
}
_Exit(1);
}
EXPECT_TRUE(WIFEXITED(wstatus));
EXPECT_FALSE(WIFSIGNALED(wstatus));
EXPECT_EQ(0, WEXITSTATUS(wstatus));
EXPECT_EQ(0, WTERMSIG(wstatus));
}
TEST(setrlimit, testVirtualMemoryLimit) {
char *p;
int i, wstatus;
ASSERT_NE(-1, (wstatus = xspawn(0)));
if (wstatus == -2) {
ASSERT_EQ(0, setrlimit(RLIMIT_AS, &(struct rlimit){MEM, MEM}));
for (i = 0; i < (MEM * 2) / getpagesize(); ++i) {
if ((p = mmap(0, getpagesize(), PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_POPULATE, -1, 0)) ==
MAP_FAILED) {
ASSERT_EQ(ENOMEM, errno);
_Exit(0);
}
rngset(p, getpagesize(), lemur64, -1);
}
_Exit(1);
}
EXPECT_TRUE(WIFEXITED(wstatus));
EXPECT_FALSE(WIFSIGNALED(wstatus));
EXPECT_EQ(0, WEXITSTATUS(wstatus));
EXPECT_EQ(0, WTERMSIG(wstatus));
}
TEST(setrlimit, testDataMemoryLimit) {
char *p;
int i, wstatus;
if (IsXnu())
return; /* doesn't work on darwin */
if (IsNetbsd())
return; /* doesn't work on netbsd */
if (IsFreebsd())
return; /* doesn't work on freebsd */
if (IsLinux())
return; /* doesn't work on gnu/systemd */
if (IsWindows())
return; /* of course it doesn't work on windows */
ASSERT_NE(-1, (wstatus = xspawn(0)));
if (wstatus == -2) {
ASSERT_EQ(0, setrlimit(RLIMIT_DATA, &(struct rlimit){MEM, MEM}));
for (i = 0; i < (MEM * 2) / getpagesize(); ++i) {
p = sys_mmap(0, getpagesize(), PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_POPULATE, -1, 0)
.addr;
if (p == MAP_FAILED) {
ASSERT_EQ(ENOMEM, errno);
_Exit(0);
}
rngset(p, getpagesize(), lemur64, -1);
}
_Exit(1);
}
EXPECT_TRUE(WIFEXITED(wstatus));
EXPECT_FALSE(WIFSIGNALED(wstatus));
EXPECT_EQ(0, WEXITSTATUS(wstatus));
EXPECT_EQ(0, WTERMSIG(wstatus));
}
TEST(setrlimit, testPhysicalMemoryLimit) {
/* RLIMIT_RSS doesn't work on gnu/systemd */
/* RLIMIT_RSS doesn't work on darwin */
/* RLIMIT_RSS doesn't work on freebsd */
/* RLIMIT_RSS doesn't work on netbsd */
/* RLIMIT_RSS doesn't work on openbsd */
/* of course it doesn't work on windows */
}
wontreturn void OnVfork(void *ctx) {
struct rlimit *rlim;
rlim = ctx;
rlim->rlim_cur -= 1;
ASSERT_EQ(0, getrlimit(RLIMIT_CPU, rlim));
_Exit(0);
}
TEST(setrlimit, isVforkSafe) {
int ws;
struct rlimit rlim[2];
if (IsWindows())
return; /* of course it doesn't work on windows */
ASSERT_EQ(0, getrlimit(RLIMIT_CPU, rlim));
ASSERT_NE(-1, (ws = xvspawn(OnVfork, rlim, 0)));
EXPECT_TRUE(WIFEXITED(ws));
EXPECT_FALSE(WIFSIGNALED(ws));
EXPECT_EQ(0, WEXITSTATUS(ws));
EXPECT_EQ(0, WTERMSIG(ws));
ASSERT_EQ(0, getrlimit(RLIMIT_CPU, rlim + 1));
EXPECT_EQ(rlim[0].rlim_cur, rlim[1].rlim_cur);
EXPECT_EQ(rlim[0].rlim_max, rlim[1].rlim_max);
}

View file

@ -59,7 +59,7 @@ void CrashHandler(int sig, siginfo_t *si, void *ctx) {
kprintf("kprintf avoids overflowing %G si_addr=%lx sp=%lx\n", si->si_signo,
si->si_addr, ((ucontext_t *)ctx)->uc_mcontext.SP);
smashed_stack = true;
unassert(__is_stack_overflow(si, ctx));
// unassert(__is_stack_overflow(si, ctx)); // fuzzy with main thread
longjmp(recover, 123);
}

View file

@ -16,6 +16,7 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigaltstack.h"
#include "libc/calls/struct/siginfo.h"
@ -40,8 +41,9 @@
volatile bool smashed_stack;
void CrashHandler(int sig) {
void CrashHandler(int sig, siginfo_t *si, void *ctx) {
smashed_stack = true;
unassert(__is_stack_overflow(si, ctx));
pthread_exit((void *)123L);
}
@ -63,7 +65,7 @@ void *MyPosixThread(void *arg) {
ASSERT_SYS(0, 0, sigaltstack(&ss, 0));
sa.sa_flags = SA_SIGINFO | SA_ONSTACK; // <-- important
sigemptyset(&sa.sa_mask);
sa.sa_handler = CrashHandler;
sa.sa_sigaction = CrashHandler;
sigaction(SIGBUS, &sa, 0);
sigaction(SIGSEGV, &sa, 0);
exit(StackOverflow(1));

View file

@ -16,22 +16,28 @@
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include <cosmo.h>
#include <limits.h>
#include <pthread.h>
#include <signal.h>
#include <unistd.h>
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/siginfo.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/consts/ss.h"
#include "libc/thread/thread.h"
#include "libc/thread/tls.h"
/**
* stack overflow recovery technique #5
* use the cosmo posix threads extensions
* stack overflow test #5
* - make sure fork() preserves sigaltstack()
* - make sure fork() preserves guard page status
*/
sig_atomic_t smashed_stack;
jmp_buf recover;
void CrashHandler(int sig) {
smashed_stack = true;
pthread_exit(0);
void CrashHandler(int sig, siginfo_t *si, void *ctx) {
unassert(__is_stack_overflow(si, ctx));
longjmp(recover, 123);
}
int StackOverflow(int d) {
@ -44,42 +50,40 @@ int StackOverflow(int d) {
}
void *MyPosixThread(void *arg) {
exit(StackOverflow(1));
int pid;
unassert(__get_tls()->tib_sigstack_addr);
unassert((pid = fork()) != -1);
if (!pid) {
int jumpcode;
if (!(jumpcode = setjmp(recover))) {
StackOverflow(1);
_Exit(1);
}
unassert(123 == jumpcode);
} else {
int ws;
unassert(wait(&ws) != -1);
unassert(!ws);
pthread_exit(0);
}
return 0;
}
int main() {
// choose the most dangerously small size possible
size_t sigstacksize = sysconf(_SC_MINSIGSTKSZ) + 2048;
// setup signal handler
struct sigaction sa;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK;
sa.sa_handler = CrashHandler;
if (sigaction(SIGBUS, &sa, 0))
return 1;
if (sigaction(SIGSEGV, &sa, 0))
return 2;
sa.sa_sigaction = CrashHandler;
unassert(!sigaction(SIGBUS, &sa, 0));
unassert(!sigaction(SIGSEGV, &sa, 0));
// create thread with signal stack
pthread_t id;
pthread_t th;
pthread_attr_t attr;
if (pthread_attr_init(&attr))
return 3;
if (pthread_attr_setguardsize(&attr, getpagesize()))
return 4;
if (pthread_attr_setsigaltstacksize_np(&attr, sigstacksize))
return 5;
if (pthread_create(&id, &attr, MyPosixThread, 0))
return 6;
if (pthread_attr_destroy(&attr))
return 7;
if (pthread_join(id, 0))
return 8;
if (!smashed_stack)
return 9;
CheckForMemoryLeaks();
unassert(!pthread_attr_init(&attr));
unassert(!pthread_attr_setguardsize(&attr, getpagesize()));
unassert(!pthread_attr_setsigaltstacksize_np(&attr, SIGSTKSZ));
unassert(!pthread_create(&th, &attr, MyPosixThread, 0));
unassert(!pthread_attr_destroy(&attr));
unassert(!pthread_join(th, 0));
}

View file

@ -116,6 +116,42 @@ TEST(mmap, fixedTaken) {
EXPECT_SYS(0, 0, munmap(p, 1));
}
TEST(mmap, anon_rw_to_rx) {
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, 1, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
ASSERT_SYS(0, 0, mprotect(p, 1, PROT_READ | PROT_EXEC));
ASSERT_SYS(0, 0, munmap(p, 1));
}
TEST(mmap, anon_rw_fork_to_rx) {
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, 1, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
SPAWN(fork);
ASSERT_SYS(0, 0, mprotect(p, 1, PROT_READ | PROT_EXEC));
EXITS(0);
ASSERT_SYS(0, 0, munmap(p, 1));
}
TEST(mmap, anon_r_to_rw) {
char *p;
ASSERT_NE(MAP_FAILED,
(p = mmap(0, 1, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
ASSERT_SYS(0, 0, mprotect(p, 1, PROT_READ | PROT_WRITE));
ASSERT_SYS(0, 0, munmap(p, 1));
}
TEST(mmap, anon_r_fork_to_rw) {
char *p;
ASSERT_NE(MAP_FAILED,
(p = mmap(0, 1, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
SPAWN(fork);
ASSERT_SYS(0, 0, mprotect(p, 1, PROT_READ | PROT_WRITE));
EXITS(0);
ASSERT_SYS(0, 0, munmap(p, 1));
}
TEST(mmap, hint) {
char *p;

View file

@ -151,6 +151,32 @@ TEST(fork, preservesTlsMemory) {
EXITS(0);
}
TEST(fork, privateExtraPageData_getsCopiedByFork) {
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, 1, PROT_WRITE | PROT_READ,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)));
p[0] = 1;
p[1] = 2;
SPAWN(fork);
ASSERT_EQ(1, p[0]);
ASSERT_EQ(2, p[1]);
EXITS(0);
ASSERT_SYS(0, 0, munmap(p, 1));
}
TEST(fork, sharedExtraPageData_getsResurrectedByFork) {
char *p;
ASSERT_NE(MAP_FAILED, (p = mmap(0, 1, PROT_WRITE | PROT_READ,
MAP_ANONYMOUS | MAP_SHARED, -1, 0)));
p[0] = 1;
p[1] = 2;
SPAWN(fork);
ASSERT_EQ(1, p[0]);
ASSERT_EQ(2, p[1]);
EXITS(0);
ASSERT_SYS(0, 0, munmap(p, 1));
}
#define CHECK_TERMSIG \
if (WIFSIGNALED(ws)) { \
kprintf("%s:%d: error: forked life subprocess terminated with %G\n", \