mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-02-07 23:13:34 +00:00
This change introduces the nointernet() function which may be called to prevent a process and its descendants from communicating with publicly routable Internet addresses. GNU Make has been modified to always call this function. In the future Landlock Make will have a way to whitelist subnets to override this behavior, or disable it entirely. Support is available for Linux only. Our firewall does not require root access. Calling nointernet() will return control to the caller inside a new process that has a SECCOMP BPF filter installed, which traps network related system calls. Your original process then becomes a permanent ptrace() supervisor that monitors all processes and threads descending from the returned child. Whenever a networking system call happens the kernel will stop the process and wakes up the monitor, which then peeks into the child memory to read the sockaddr_in to determine if it's ok. The downside to doing this is that there can be only one supervisor at a time using ptrace() on a process. So this firewall won't be enabled if you run make under strace or inside gdb. It also makes testing tricky.
348 lines
12 KiB
C
348 lines
12 KiB
C
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
|
│ │
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
│ │
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
#include "libc/assert.h"
|
|
#include "libc/calls/calls.h"
|
|
#include "libc/calls/struct/bpf.h"
|
|
#include "libc/calls/struct/filter.h"
|
|
#include "libc/calls/struct/seccomp.h"
|
|
#include "libc/calls/struct/sigaction.h"
|
|
#include "libc/calls/struct/sigset.h"
|
|
#include "libc/calls/syscall_support-sysv.internal.h"
|
|
#include "libc/dce.h"
|
|
#include "libc/errno.h"
|
|
#include "libc/intrin/describeflags.internal.h"
|
|
#include "libc/intrin/kprintf.h"
|
|
#include "libc/intrin/likely.h"
|
|
#include "libc/macros.internal.h"
|
|
#include "libc/runtime/runtime.h"
|
|
#include "libc/sock/sock.h"
|
|
#include "libc/sock/struct/msghdr.h"
|
|
#include "libc/sock/struct/sockaddr.h"
|
|
#include "libc/str/str.h"
|
|
#include "libc/sysv/consts/af.h"
|
|
#include "libc/sysv/consts/audit.h"
|
|
#include "libc/sysv/consts/nr.h"
|
|
#include "libc/sysv/consts/nrlinux.h"
|
|
#include "libc/sysv/consts/pr.h"
|
|
#include "libc/sysv/consts/ptrace.h"
|
|
#include "libc/sysv/consts/sig.h"
|
|
#include "libc/sysv/errfuns.h"
|
|
#include "net/http/ip.h"
|
|
|
|
#define ORIG_RAX 120
|
|
#define RAX 80
|
|
#define RDI 112
|
|
#define RSI 104
|
|
#define RDX 96
|
|
#define R8 72
|
|
#define R9 64
|
|
#define __WALL 0x40000000
|
|
|
|
#define OFF(f) offsetof(struct seccomp_data, f)
|
|
|
|
#if 0
|
|
#define DEBUG(...) kprintf(__VA_ARGS__)
|
|
#else
|
|
#define DEBUG(...) donothing
|
|
#endif
|
|
|
|
#define ORDIE(x) \
|
|
do { \
|
|
if (UNLIKELY((x) == -1)) { \
|
|
DEBUG("%s:%d: %s failed %m\n", __FILE__, __LINE__, #x); \
|
|
asm("hlt"); \
|
|
unreachable; \
|
|
} \
|
|
} while (0)
|
|
|
|
static const struct sock_filter kInetBpf[] = {
|
|
// cargo culted architecture assertion
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
|
|
// block system calls from the future
|
|
BPF_STMT(BPF_LD + BPF_W + BPF_ABS, OFF(nr)),
|
|
BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 0, 1),
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 38), // ENOSYS
|
|
// only allow local and internet sockets
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 5),
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x001, 2, 0), // AF_UNIX
|
|
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x002, 1, 0), // AF_INET
|
|
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
|
|
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
|
|
// support for these not implemented yet
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x133, 0, 1), // sendmmsg
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
|
|
// trace syscalls with struct sockaddr
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02e, 3, 0), // sendmsg
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02c, 2, 0), // sendto
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x031, 1, 0), // bind
|
|
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02a, 0, 1), // connect
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE),
|
|
// default course of action
|
|
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW),
|
|
};
|
|
|
|
static int PeekData(int pid, long addr, void *buf, size_t size) {
|
|
long i, j, w;
|
|
for (i = 0; i < size; i += sizeof(long)) {
|
|
if (sys_ptrace(PTRACE_PEEKTEXT, pid, addr + i, &w) != -1) {
|
|
for (j = 0; i + j < size && j < sizeof(long); ++j) {
|
|
((char *)buf)[i + j] = w;
|
|
w >>= 8;
|
|
}
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void LogProcessEvent(int main, int pid, int ws) {
|
|
DEBUG("trace: %s%06d%s 0x%06x", //
|
|
pid == main ? "\e[31;1m" : "", //
|
|
pid, //
|
|
pid == main ? "\e[0m" : "", //
|
|
ws);
|
|
if (WIFEXITED(ws)) {
|
|
DEBUG(" exit %d", WEXITSTATUS(ws));
|
|
}
|
|
if (WIFSIGNALED(ws)) {
|
|
DEBUG(" sig %d", WTERMSIG(ws));
|
|
}
|
|
if (WIFSTOPPED(ws)) {
|
|
DEBUG(" stop %s %s", strsignal(WSTOPSIG(ws)),
|
|
DescribePtraceEvent((ws & 0xff0000) >> 16));
|
|
}
|
|
if (WIFCONTINUED(ws)) {
|
|
DEBUG(" cont");
|
|
}
|
|
if (WCOREDUMP(ws)) {
|
|
DEBUG(" core");
|
|
}
|
|
DEBUG("\n");
|
|
}
|
|
|
|
static int Raise(int sig) {
|
|
sigset_t mask;
|
|
sigaction(sig, &(struct sigaction){0}, 0);
|
|
sigfillset(&mask);
|
|
sigprocmask(SIG_SETMASK, &mask, 0);
|
|
kill(getpid(), sig);
|
|
sigdelset(&mask, sig);
|
|
sigprocmask(SIG_SETMASK, &mask, 0);
|
|
_Exit(128 + sig);
|
|
}
|
|
|
|
static bool IsSockaddrAllowed(struct sockaddr_storage *addr) {
|
|
uint32_t ip;
|
|
if (addr->ss_family == AF_UNIX) {
|
|
return true;
|
|
}
|
|
if (addr->ss_family == AF_INET) {
|
|
ip = ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr);
|
|
if (!IsPublicIp(ip)) {
|
|
return true;
|
|
} else {
|
|
kprintf("warning: attempted to communicate with public ip "
|
|
"%hhd.%hhd.%hhd.%hhd\n",
|
|
ip >> 24, ip >> 16, ip >> 8, ip);
|
|
return false;
|
|
}
|
|
}
|
|
DEBUG("bad family %d\n", addr->ss_family);
|
|
return false;
|
|
}
|
|
|
|
static void OnSockaddrSyscall(int pid, int r1, int r2) {
|
|
long si, dx;
|
|
uint32_t addrlen;
|
|
struct sockaddr_storage addr = {0};
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r1, &si));
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r2, &dx));
|
|
addrlen = dx;
|
|
if (!si) {
|
|
// if address isn't supplied, it's probably safe. for example,
|
|
// send() is implemented in cosmo using sendto() with 0/0 addr
|
|
return;
|
|
}
|
|
if (PeekData(pid, si, &addr, MIN(addrlen, sizeof(addr))) == -1) {
|
|
DEBUG("failed to peek addr\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (IsSockaddrAllowed(&addr)) {
|
|
return;
|
|
} else {
|
|
goto Deny;
|
|
}
|
|
Deny:
|
|
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
|
|
}
|
|
|
|
static void OnSendmsg(int pid) {
|
|
long si;
|
|
struct msghdr msg = {0};
|
|
struct sockaddr_storage addr = {0};
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, RSI, &si));
|
|
if (PeekData(pid, si, &msg, sizeof(msg)) == -1) {
|
|
DEBUG("failed to peek msg\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (!msg.msg_name) {
|
|
// if address isn't supplied, it's probably fine.
|
|
return;
|
|
}
|
|
if (PeekData(pid, (long)msg.msg_name, &addr,
|
|
MIN(msg.msg_namelen, sizeof(addr))) == -1) {
|
|
DEBUG("failed to peek msg name\n"); // probably an efault
|
|
goto Deny;
|
|
}
|
|
if (IsSockaddrAllowed(&addr)) {
|
|
return;
|
|
} else {
|
|
goto Deny;
|
|
}
|
|
Deny:
|
|
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
|
|
}
|
|
|
|
static void HandleSeccompTrace(int pid) {
|
|
long ax;
|
|
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, ORIG_RAX, &ax));
|
|
switch (ax) {
|
|
case 0x031: // bind
|
|
case 0x02a: // connect
|
|
OnSockaddrSyscall(pid, RSI, RDX);
|
|
break;
|
|
case 0x02c: // sendto
|
|
OnSockaddrSyscall(pid, R8, R9);
|
|
break;
|
|
case 0x02e: // sendmsg
|
|
OnSendmsg(pid);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int WaitForTrace(int main) {
|
|
int ws, pid;
|
|
for (;;) {
|
|
// waits for state change on any child process or thread
|
|
// eintr isn't possible since we're blocking all signals
|
|
ORDIE(pid = waitpid(-1, &ws, __WALL));
|
|
LogProcessEvent(main, pid, ws);
|
|
// once main child exits or dies, we exit / die the same way. we're
|
|
// not currently tracking pids, so it's important that a child does
|
|
// not exit before its children. otherwise the grandchildren get in
|
|
// a permanently stopped state. to address that, we'll send sigterm
|
|
// to the process group which we defined earlier.
|
|
if (WIFEXITED(ws)) {
|
|
if (pid == main) {
|
|
kill(-getpid(), SIGTERM);
|
|
_Exit(WEXITSTATUS(ws));
|
|
}
|
|
} else if (WIFSIGNALED(ws)) {
|
|
if (pid == main) {
|
|
kill(-getpid(), SIGTERM);
|
|
Raise(WTERMSIG(ws));
|
|
}
|
|
} else if (WIFSTOPPED(ws)) {
|
|
if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
|
|
return pid;
|
|
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_EXEC << 8))) {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
|
|
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_FORK << 8)) ||
|
|
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_VFORK << 8)) ||
|
|
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
|
|
} else {
|
|
ORDIE(ptrace(PTRACE_CONT, pid, 0, WSTOPSIG(ws)));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Disables internet access.
|
|
*/
|
|
int nointernet(void) {
|
|
int ws, act, main;
|
|
sigset_t set, old;
|
|
char path[PATH_MAX];
|
|
struct sock_fprog prog = {.filter = kInetBpf, .len = ARRAYLEN(kInetBpf)};
|
|
|
|
// seccomp bpf and ptrace are pretty much just linux for now.
|
|
if (!IsLinux() || !__is_linux_2_6_23()) {
|
|
return enosys();
|
|
}
|
|
|
|
// ensure we're at the root of a process group, so we're able to
|
|
// broadcast a termination signal later on that catches dangling
|
|
// subprocesss our child forgot to destroy. without calling this
|
|
// subprocesses could end up permanently stopped if monitor dies
|
|
setpgrp();
|
|
|
|
// prevent crash handlers from intercepting sigsegv
|
|
ORDIE(sigfillset(&set));
|
|
ORDIE(sigprocmask(SIG_SETMASK, &set, &old));
|
|
|
|
// create traced child that'll replace this program
|
|
if ((main = fork()) == -1) {
|
|
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
|
|
return -1;
|
|
}
|
|
if (!main) {
|
|
if (sys_ptrace(PTRACE_TRACEME, 0, 0, 0) == -1) {
|
|
// there can be only one
|
|
// throw sigsegv on eperm
|
|
// we're already being traced
|
|
asm("hlt");
|
|
}
|
|
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
|
ORDIE(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog));
|
|
ORDIE(kill(getpid(), SIGSTOP));
|
|
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
|
|
// return to caller from child
|
|
return 0;
|
|
}
|
|
|
|
// wait for child to stop itself
|
|
ORDIE(waitpid(main, &ws, 0));
|
|
if (WIFSIGNALED(ws)) {
|
|
// child couldn't enable ptrace or seccomp
|
|
sigprocmask(SIG_SETMASK, &old, 0);
|
|
return eperm();
|
|
}
|
|
assert(WIFSTOPPED(ws));
|
|
|
|
// parent process becomes monitor of subprocess tree. all signals
|
|
// continue to be blocked since we assume they'll also be sent to
|
|
// children, which will die, and then the monitor dies afterwards
|
|
ORDIE(sys_ptrace(PTRACE_SETOPTIONS, main, 0,
|
|
PTRACE_O_TRACESECCOMP | PTRACE_O_TRACEFORK |
|
|
PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE |
|
|
PTRACE_O_TRACEEXEC));
|
|
for (act = main;;) {
|
|
ORDIE(sys_ptrace(PTRACE_CONT, act, 0, 0));
|
|
act = WaitForTrace(main);
|
|
HandleSeccompTrace(act);
|
|
}
|
|
}
|