cosmopolitan/libc/sock/nointernet.c

340 lines
12 KiB
C
Raw Normal View History

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/struct/bpf.h"
#include "libc/calls/struct/filter.h"
#include "libc/calls/struct/seccomp.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/syscall_support-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/likely.h"
#include "libc/macros.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/sock.h"
#include "libc/sock/struct/msghdr.h"
#include "libc/sock/struct/sockaddr.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/af.h"
#include "libc/sysv/consts/audit.h"
#include "libc/sysv/consts/nr.h"
#include "libc/sysv/consts/nrlinux.h"
#include "libc/sysv/consts/pr.h"
#include "libc/sysv/consts/ptrace.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#include "net/http/ip.h"
#define ORIG_RAX 120
#define RAX 80
#define RDI 112
#define RSI 104
#define RDX 96
#define R8 72
#define R9 64
#define __WALL 0x40000000
#define OFF(f) offsetof(struct seccomp_data, f)
#if 0
#define DEBUG(...) kprintf(__VA_ARGS__)
#else
#define DEBUG(...) donothing
#endif
#define ORDIE(x) \
do { \
if (UNLIKELY((x) == -1)) { \
DEBUG("%s:%d: %s failed %m\n", __FILE__, __LINE__, #x); \
notpossible; \
} \
} while (0)
static const struct sock_filter kInetBpf[] = {
// cargo culted architecture assertion
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
// block system calls from the future
BPF_STMT(BPF_LD + BPF_W + BPF_ABS, OFF(nr)),
BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 38), // ENOSYS
// only allow local and internet sockets
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 5),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x001, 2, 0), // AF_UNIX
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x002, 1, 0), // AF_INET
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
// support for these not implemented yet
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x133, 0, 1), // sendmmsg
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | 1), // EPERM
// trace syscalls with struct sockaddr
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02e, 3, 0), // sendmsg
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02c, 2, 0), // sendto
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x031, 1, 0), // bind
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, 0x02a, 0, 1), // connect
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRACE),
// default course of action
BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW),
};
static int PeekData(int pid, long addr, void *buf, size_t size) {
long i, j, w;
for (i = 0; i < size; i += sizeof(long)) {
if (sys_ptrace(PTRACE_PEEKTEXT, pid, addr + i, &w) != -1) {
for (j = 0; i + j < size && j < sizeof(long); ++j) {
((char *)buf)[i + j] = w;
w >>= 8;
}
} else {
return -1;
}
}
return 0;
}
static void LogProcessEvent(int main, int pid, int ws) {
DEBUG("trace: %s%06d%s 0x%06x", //
pid == main ? "\e[31;1m" : "", //
pid, //
pid == main ? "\e[0m" : "", //
ws);
if (WIFEXITED(ws)) {
DEBUG(" exit %d", WEXITSTATUS(ws));
}
if (WIFSIGNALED(ws)) {
DEBUG(" sig %d", WTERMSIG(ws));
}
if (WIFSTOPPED(ws)) {
DEBUG(" stop %s %s", strsignal(WSTOPSIG(ws)),
DescribePtraceEvent((ws & 0xff0000) >> 16));
}
if (WIFCONTINUED(ws)) {
DEBUG(" cont");
}
if (WCOREDUMP(ws)) {
DEBUG(" core");
}
DEBUG("\n");
}
static int Raise(int sig) {
sigset_t mask;
sigaction(sig, &(struct sigaction){0}, 0);
sigfillset(&mask);
sigprocmask(SIG_SETMASK, &mask, 0);
kill(getpid(), sig);
sigdelset(&mask, sig);
sigprocmask(SIG_SETMASK, &mask, 0);
_Exit(128 + sig);
}
static bool IsSockaddrAllowed(struct sockaddr_storage *addr) {
uint32_t ip;
if (addr->ss_family == AF_UNIX) {
return true;
}
if (addr->ss_family == AF_INET) {
ip = ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr);
if (IsPrivateIp(ip) || IsLoopbackIp(ip)) {
return true;
} else {
kprintf("warning: attempted to communicate with public ip "
"%hhd.%hhd.%hhd.%hhd\n",
ip >> 24, ip >> 16, ip >> 8, ip);
return false;
}
}
DEBUG("bad family %d\n", addr->ss_family);
return false;
}
static void OnSockaddrSyscall(int pid, int r1, int r2) {
long si, dx;
uint32_t addrlen;
struct sockaddr_storage addr = {0};
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r1, &si));
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, r2, &dx));
addrlen = dx;
if (!si) {
// if address isn't supplied, it's probably safe. for example,
// send() is implemented in cosmo using sendto() with 0/0 addr
return;
}
if (PeekData(pid, si, &addr, MIN(addrlen, sizeof(addr))) == -1) {
DEBUG("failed to peek addr\n"); // probably an efault
goto Deny;
}
if (IsSockaddrAllowed(&addr)) {
return;
} else {
goto Deny;
}
Deny:
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
}
static void OnSendmsg(int pid) {
long si;
struct msghdr msg = {0};
struct sockaddr_storage addr = {0};
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, RSI, &si));
if (PeekData(pid, si, &msg, sizeof(msg)) == -1) {
DEBUG("failed to peek msg\n"); // probably an efault
goto Deny;
}
if (!msg.msg_name) {
// if address isn't supplied, it's probably fine.
return;
}
if (PeekData(pid, (long)msg.msg_name, &addr,
MIN(msg.msg_namelen, sizeof(addr))) == -1) {
DEBUG("failed to peek msg name\n"); // probably an efault
goto Deny;
}
if (IsSockaddrAllowed(&addr)) {
return;
} else {
goto Deny;
}
Deny:
ORDIE(sys_ptrace(PTRACE_POKEUSER, pid, ORIG_RAX, -1));
}
static void HandleSeccompTrace(int pid) {
long ax;
ORDIE(sys_ptrace(PTRACE_PEEKUSER, pid, ORIG_RAX, &ax));
switch (ax) {
case 0x031: // bind
case 0x02a: // connect
OnSockaddrSyscall(pid, RSI, RDX);
break;
case 0x02c: // sendto
OnSockaddrSyscall(pid, R8, R9);
break;
case 0x02e: // sendmsg
OnSendmsg(pid);
break;
default:
break;
}
}
static int WaitForTrace(int main) {
int ws, pid;
for (;;) {
// waits for state change on any child process or thread
// eintr isn't possible since we're blocking all signals
ORDIE(pid = waitpid(-1, &ws, __WALL));
LogProcessEvent(main, pid, ws);
if (WIFEXITED(ws)) {
if (pid == main) {
_Exit(WEXITSTATUS(ws));
}
} else if (WIFSIGNALED(ws)) {
if (pid == main) {
Raise(WTERMSIG(ws));
}
} else if (WIFSTOPPED(ws)) {
if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_SECCOMP << 8))) {
return pid;
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_EXEC << 8))) {
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
} else if ((ws >> 8) == (SIGTRAP | (PTRACE_EVENT_FORK << 8)) ||
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_VFORK << 8)) ||
(ws >> 8) == (SIGTRAP | (PTRACE_EVENT_CLONE << 8))) {
ORDIE(ptrace(PTRACE_CONT, pid, 0, 0));
} else {
ORDIE(ptrace(PTRACE_CONT, pid, 0, WSTOPSIG(ws)));
}
}
}
}
/**
* Disables internet access.
*
* Warning: This function uses ptrace to react to seccomp filter events.
* This approach is effective, but it's not bulletproof, since a highly
* motivated attacker could theoretically use threads to modify sockaddr
* in the short time between it being monitored and the actual syscall.
*/
int nointernet(void) {
int ws, act, main;
sigset_t set, old;
char path[PATH_MAX];
struct sock_fprog prog = {.filter = kInetBpf, .len = ARRAYLEN(kInetBpf)};
// seccomp bpf and ptrace are pretty much just linux for now.
if (!IsLinux() || !__is_linux_2_6_23()) {
return enosys();
}
// prevent crash handlers from intercepting sigsegv
ORDIE(sigfillset(&set));
ORDIE(sigprocmask(SIG_SETMASK, &set, &old));
// create traced child that'll replace this program
if ((main = fork()) == -1) {
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
return -1;
}
if (!main) {
if (sys_ptrace(PTRACE_TRACEME, 0, 0, 0) == -1) {
// there can be only one
// throw sigsegv on eperm
// we're already being traced
asm("hlt");
}
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ORDIE(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog));
ORDIE(kill(getpid(), SIGSTOP));
ORDIE(sigprocmask(SIG_SETMASK, &old, 0));
// return to caller from child
return 0;
}
// wait for child to stop itself
ORDIE(waitpid(main, &ws, 0));
if (WIFSIGNALED(ws)) {
// child couldn't enable ptrace or seccomp
sigprocmask(SIG_SETMASK, &old, 0);
return eperm();
}
_npassert(WIFSTOPPED(ws));
// parent process becomes monitor of subprocess tree. all signals
// continue to be blocked since we assume they'll also be sent to
// children, which will die, and then the monitor dies afterwards
ORDIE(sys_ptrace(PTRACE_SETOPTIONS, main, 0,
PTRACE_O_TRACESECCOMP | PTRACE_O_TRACEFORK |
PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE |
PTRACE_O_TRACEEXEC));
for (act = main;;) {
ORDIE(sys_ptrace(PTRACE_CONT, act, 0, 0));
act = WaitForTrace(main);
HandleSeccompTrace(act);
}
}