mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 19:43:32 +00:00
d5910e2673
- Fixed bug where stdio eof wasn't being sticky - Fixed bug where fseeko() wasn't clearing eof state - Removed assert() usage from libc favoring _unassert() / _npassert()
339 lines
12 KiB
C
339 lines
12 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/assert.h"
|
|
#include "libc/calls/calls.h"
|
|
#include "libc/calls/struct/bpf.h"
|
|
#include "libc/calls/struct/filter.h"
|
|
#include "libc/calls/struct/seccomp.h"
|
|
#include "libc/calls/struct/sigaction.h"
|
|
#include "libc/calls/struct/sigset.h"
|
|
#include "libc/calls/syscall_support-sysv.internal.h"
|
|
#include "libc/dce.h"
|
|
#include "libc/errno.h"
|
|
#include "libc/intrin/describeflags.internal.h"
|
|
#include "libc/intrin/kprintf.h"
|
|
#include "libc/intrin/likely.h"
|
|
#include "libc/macros.internal.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/sock/sock.h"
|
|
#include "libc/sock/struct/msghdr.h"
|
|
#include "libc/sock/struct/sockaddr.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/sysv/consts/af.h"
|
|
#include "libc/sysv/consts/audit.h"
|
|
#include "libc/sysv/consts/nr.h"
|
|
#include "libc/sysv/consts/nrlinux.h"
|
|
#include "libc/sysv/consts/pr.h"
|
|
#include "libc/sysv/consts/ptrace.h"
|
|
#include "libc/sysv/consts/sig.h"
|
|
#include "libc/sysv/errfuns.h"
|
|
#include "net/http/ip.h"
|
|
|
|
#define ORIG_RAX 120
|
|
#define RAX 80
|
|
#define RDI 112
|
|
#define RSI 104
|
|
#define RDX 96
|
|
#define R8 72
|
|
#define R9 64
|
|
#define __WALL 0x40000000
|
|
|
|
#define OFF(f) offsetof(struct seccomp_data, f)
|
|
|
|
#if 0
|
|
#define DEBUG(...) kprintf(__VA_ARGS__)
|
|
#else
|
|
#define DEBUG(...) donothing
|
|
#endif
|
|
|
|
#define ORDIE(x) \
|
|
do { \
|
|
if (UNLIKELY((x) == -1)) { \
|
|
DEBUG("%s:%d: %s failed %m\n", __FILE__, __LINE__, #x); \
|
|
notpossible; \
|
|
} \
|
|
} while (0)
|
|
|
|
static const struct sock_filter kInetBpf[] = {
|
|
// cargo culted architecture assertion
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
|
|
// block system calls from the future
|
|
BPF_STMT(BPF_LD + BPF_W + BPF_ABS, OFF(nr)),
|
|
BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 0, 1),
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 38), // ENOSYS
|
|
// only allow local and internet sockets
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 5),
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x001, 2, 0), // AF_UNIX
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x002, 1, 0), // AF_INET
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
|
|
// support for these not implemented yet
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x133, 0, 1), // sendmmsg
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
|
|
// trace syscalls with struct sockaddr
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02e, 3, 0), // sendmsg
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02c, 2, 0), // sendto
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x031, 1, 0), // bind
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02a, 0, 1), // connect
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE),
|
|
// default course of action
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW),
|
|
};
|
|
|
|
static int PeekData(int pid, long addr, void *buf, size_t size) {
|
|
long i, j, w;
|
|
for (i = 0; i < size; i += sizeof(long)) {
|
|
if (sys_ptrace(PTRACE_PEEKTEXT, pid, addr + i, &w) != -1) {
|
|
for (j = 0; i + j < size && j < sizeof(long); ++j) {
|
|
((char *)buf)[i + j] = w;
|
|
w >>= 8;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void LogProcessEvent(int main, int pid, int ws) {
|
|
DEBUG("trace: %s%06d%s 0x%06x", //
|
|
pid == main ? "\e[31;1m" : "", //
|
|
pid, //
|
|
pid == main ? "\e[0m" : "", //
|
|
ws);
|
|
if (WIFEXITED(ws)) {
|
|
DEBUG(" exit %d", WEXITSTATUS(ws));
|
|
}
|
|
if (WIFSIGNALED(ws)) {
|
|
DEBUG(" sig %d", WTERMSIG(ws));
|
|
}
|
|
if (WIFSTOPPED(ws)) {
|
|
DEBUG(" stop %s %s", strsignal(WSTOPSIG(ws)),
|
|
DescribePtraceEvent((ws & 0xff0000) >> 16));
|
|
}
|
|
if (WIFCONTINUED(ws)) {
|
|
DEBUG(" cont");
|
|
}
|
|
if (WCOREDUMP(ws)) {
|
|
DEBUG(" core");
|
|
}
|
|
DEBUG("\n");
|
|
}
|
|
|
|
static int Raise(int sig) {
|
|
sigset_t mask;
|
|
sigaction(sig, &(struct sigaction){0}, 0);
|
|
sigfillset(&mask);
|
|
sigprocmask(SIG_SETMASK, &mask, 0);
|
|
kill(getpid(), sig);
|
|
sigdelset(&mask, sig);
|
|
sigprocmask(SIG_SETMASK, &mask, 0);
|
|
_Exit(128 + sig);
|
|
}
|
|
|
|
static bool IsSockaddrAllowed(struct sockaddr_storage *addr) {
|
|
uint32_t ip;
|
|
if (addr->ss_family == AF_UNIX) {
|
|
return true;
|
|
}
|
|
if (addr->ss_family == AF_INET) {
|
|
ip = ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr);
|
|
if (IsPrivateIp(ip) || IsLoopbackIp(ip)) {
|
|
return true;
|
|
} else {
|
|
kprintf("warning: attempted to communicate with public ip "
|
|
"%hhd.%hhd.%hhd.%hhd\n",
|
|
ip >> 24, ip >> 16, ip >> 8, ip);
|
|
return false;
|
|
}
|
|
}
|
|
DEBUG("bad family %d\n", addr->ss_family);
|
|
return false;
|
|
}
|
|
|
|
static void OnSockaddrSyscall(int pid, int r1, int r2) {
|
|
long si, dx;
|
|
uint32_t addrlen;
|
|
struct sockaddr_storage addr = {0};
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r1, &si));
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r2, &dx));
|
|
addrlen = dx;
|
|
if (!si) {
|
|
// if address isn't supplied, it's probably safe. for example,
|
|
// send() is implemented in cosmo using sendto() with 0/0 addr
|
|
return;
|
|
}
|
|
if (PeekData(pid, si, &addr, MIN(addrlen, sizeof(addr))) == -1) {
|
|
DEBUG("failed to peek addr\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (IsSockaddrAllowed(&addr)) {
|
|
return;
|
|
} else {
|
|
goto Deny;
|
|
}
|
|
Deny:
|
|
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
|
|
}
|
|
|
|
static void OnSendmsg(int pid) {
|
|
long si;
|
|
struct msghdr msg = {0};
|
|
struct sockaddr_storage addr = {0};
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, RSI, &si));
|
|
if (PeekData(pid, si, &msg, sizeof(msg)) == -1) {
|
|
DEBUG("failed to peek msg\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (!msg.msg_name) {
|
|
// if address isn't supplied, it's probably fine.
|
|
return;
|
|
}
|
|
if (PeekData(pid, (long)msg.msg_name, &addr,
|
|
MIN(msg.msg_namelen, sizeof(addr))) == -1) {
|
|
DEBUG("failed to peek msg name\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (IsSockaddrAllowed(&addr)) {
|
|
return;
|
|
} else {
|
|
goto Deny;
|
|
}
|
|
Deny:
|
|
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
|
|
}
|
|
|
|
static void HandleSeccompTrace(int pid) {
|
|
long ax;
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, ORIG_RAX, &ax));
|
|
switch (ax) {
|
|
case 0x031: // bind
|
|
case 0x02a: // connect
|
|
OnSockaddrSyscall(pid, RSI, RDX);
|
|
break;
|
|
case 0x02c: // sendto
|
|
OnSockaddrSyscall(pid, R8, R9);
|
|
break;
|
|
case 0x02e: // sendmsg
|
|
OnSendmsg(pid);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int WaitForTrace(int main) {
|
|
int ws, pid;
|
|
for (;;) {
|
|
// waits for state change on any child process or thread
|
|
// eintr isn't possible since we're blocking all signals
|
|
ORDIE(pid = waitpid(-1, &ws, __WALL));
|
|
LogProcessEvent(main, pid, ws);
|
|
if (WIFEXITED(ws)) {
|
|
if (pid == main) {
|
|
_Exit(WEXITSTATUS(ws));
|
|
}
|
|
} else if (WIFSIGNALED(ws)) {
|
|
if (pid == main) {
|
|
Raise(WTERMSIG(ws));
|
|
}
|
|
} else if (WIFSTOPPED(ws)) {
|
|
if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
|
|
return pid;
|
|
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_EXEC << 8))) {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
|
|
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_FORK << 8)) ||
|
|
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_VFORK << 8)) ||
|
|
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
|
|
} else {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, WSTOPSIG(ws)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Disables internet access.
|
|
*
|
|
* Warning: This function uses ptrace to react to seccomp filter events.
|
|
* This approach is effective, but it's not bulletproof, since a highly
|
|
* motivated attacker could theoretically use threads to modify sockaddr
|
|
* in the short time between it being monitored and the actual syscall.
|
|
*/
|
|
int nointernet(void) {
|
|
int ws, act, main;
|
|
sigset_t set, old;
|
|
char path[PATH_MAX];
|
|
struct sock_fprog prog = {.filter = kInetBpf, .len = ARRAYLEN(kInetBpf)};
|
|
|
|
// seccomp bpf and ptrace are pretty much just linux for now.
|
|
if (!IsLinux() || !__is_linux_2_6_23()) {
|
|
return enosys();
|
|
}
|
|
|
|
// prevent crash handlers from intercepting sigsegv
|
|
ORDIE(sigfillset(&set));
|
|
ORDIE(sigprocmask(SIG_SETMASK, &set, &old));
|
|
|
|
// create traced child that'll replace this program
|
|
if ((main = fork()) == -1) {
|
|
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
|
|
return -1;
|
|
}
|
|
if (!main) {
|
|
if (sys_ptrace(PTRACE_TRACEME, 0, 0, 0) == -1) {
|
|
// there can be only one
|
|
// throw sigsegv on eperm
|
|
// we're already being traced
|
|
asm("hlt");
|
|
}
|
|
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
|
ORDIE(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog));
|
|
ORDIE(kill(getpid(), SIGSTOP));
|
|
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
|
|
// return to caller from child
|
|
return 0;
|
|
}
|
|
|
|
// wait for child to stop itself
|
|
ORDIE(waitpid(main, &ws, 0));
|
|
if (WIFSIGNALED(ws)) {
|
|
// child couldn't enable ptrace or seccomp
|
|
sigprocmask(SIG_SETMASK, &old, 0);
|
|
return eperm();
|
|
}
|
|
_npassert(WIFSTOPPED(ws));
|
|
|
|
// parent process becomes monitor of subprocess tree. all signals
|
|
// continue to be blocked since we assume they'll also be sent to
|
|
// children, which will die, and then the monitor dies afterwards
|
|
ORDIE(sys_ptrace(PTRACE_SETOPTIONS, main, 0,
|
|
PTRACE_O_TRACESECCOMP | PTRACE_O_TRACEFORK |
|
|
PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE |
|
|
PTRACE_O_TRACEEXEC));
|
|
for (act = main;;) {
|
|
ORDIE(sys_ptrace(PTRACE_CONT, act, 0, 0));
|
|
act = WaitForTrace(main);
|
|
HandleSeccompTrace(act);
|
|
}
|
|
}
|