cosmopolitan/libc/mem/pledge.c

1251 lines
46 KiB
C
Raw Normal View History

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
2022-06-27 20:01:58 +00:00
#include "libc/calls/struct/bpf.h"
#include "libc/calls/struct/filter.h"
2022-06-27 20:01:58 +00:00
#include "libc/calls/struct/seccomp.h"
2022-05-23 22:06:11 +00:00
#include "libc/calls/syscall-sysv.internal.h"
2022-06-27 22:00:51 +00:00
#include "libc/calls/syscall_support-sysv.internal.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
2022-07-16 01:07:34 +00:00
#include "libc/intrin/promises.internal.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
2022-06-27 20:01:58 +00:00
#include "libc/runtime/runtime.h"
#include "libc/sock/struct/sockaddr.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/af.h"
2022-06-27 20:01:58 +00:00
#include "libc/sysv/consts/audit.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nrlinux.h"
2022-06-27 20:01:58 +00:00
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/pr.h"
2022-06-27 20:01:58 +00:00
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
2022-06-27 20:01:58 +00:00
#define READONLY 0x8000
#define WRITEONLY 0x4000
#define INET 0x8000
#define UNIX 0x4000
#define ADDRLESS 0x2000
#define LOCK 0x8000
#define TTY 0x8000
2022-06-27 20:01:58 +00:00
#define OFF(f) offsetof(struct seccomp_data, f)
#define PLEDGE(pledge) pledge, ARRAYLEN(pledge)
struct Filter {
size_t n;
struct sock_filter *p;
};
static const uint16_t kPledgeLinuxDefault[] = {
2022-07-14 11:32:33 +00:00
__NR_linux_exit, //
};
static const uint16_t kPledgeLinuxStdio[] = {
2022-07-14 11:32:33 +00:00
__NR_linux_exit_group, //
2022-06-27 20:01:58 +00:00
__NR_linux_clock_getres, //
__NR_linux_clock_gettime, //
__NR_linux_clock_nanosleep, //
2022-06-27 20:01:58 +00:00
__NR_linux_close, //
__NR_linux_write, //
__NR_linux_writev, //
__NR_linux_pwrite, //
__NR_linux_pwritev, //
2022-06-27 22:00:51 +00:00
__NR_linux_pwritev2, //
2022-06-27 20:01:58 +00:00
__NR_linux_read, //
__NR_linux_readv, //
__NR_linux_pread, //
__NR_linux_preadv, //
2022-06-27 22:00:51 +00:00
__NR_linux_preadv2, //
2022-06-27 20:01:58 +00:00
__NR_linux_dup, //
__NR_linux_dup2, //
__NR_linux_dup3, //
__NR_linux_fchdir, //
__NR_linux_fcntl, //
__NR_linux_fstat, //
__NR_linux_fsync, //
__NR_linux_fdatasync, //
__NR_linux_ftruncate, //
__NR_linux_getdents, //
__NR_linux_getegid, //
__NR_linux_getrandom, //
__NR_linux_getgroups, //
__NR_linux_getpgid, //
__NR_linux_getpgrp, //
__NR_linux_getpid, //
__NR_linux_gettid, //
__NR_linux_getuid, //
__NR_linux_getgid, //
__NR_linux_getsid, //
__NR_linux_getppid, //
__NR_linux_geteuid, //
__NR_linux_getrlimit, //
__NR_linux_getresgid, //
__NR_linux_getresuid, //
__NR_linux_getitimer, //
__NR_linux_setitimer, //
__NR_linux_gettimeofday, //
2022-06-27 22:00:51 +00:00
__NR_linux_copy_file_range, //
__NR_linux_splice, //
2022-06-27 20:01:58 +00:00
__NR_linux_lseek, //
2022-06-27 22:00:51 +00:00
__NR_linux_tee, //
2022-06-27 20:01:58 +00:00
__NR_linux_brk, //
__NR_linux_mmap, //
__NR_linux_msync, //
__NR_linux_munmap, //
__NR_linux_madvise, //
2022-06-27 22:00:51 +00:00
__NR_linux_fadvise, //
2022-06-27 20:01:58 +00:00
__NR_linux_mprotect, //
__NR_linux_arch_prctl, //
__NR_linux_set_tid_address, //
__NR_linux_nanosleep, //
__NR_linux_pipe, //
__NR_linux_pipe2, //
__NR_linux_poll, //
__NR_linux_select, //
__NR_linux_recvfrom, //
__NR_linux_sendto | ADDRLESS, //
__NR_linux_ioctl, //
__NR_linux_shutdown, //
__NR_linux_sigaction, //
__NR_linux_sigaltstack, //
__NR_linux_sigprocmask, //
__NR_linux_sigsuspend, //
__NR_linux_sigreturn, //
__NR_linux_socketpair, //
__NR_linux_umask, //
__NR_linux_wait4, //
__NR_linux_uname, //
__NR_linux_prctl, //
2022-06-27 20:01:58 +00:00
};
static const uint16_t kPledgeLinuxFlock[] = {
__NR_linux_flock, //
__NR_linux_fcntl | LOCK, //
};
static const uint16_t kPledgeLinuxRpath[] = {
2022-06-27 20:01:58 +00:00
__NR_linux_chdir, //
__NR_linux_getcwd, //
__NR_linux_open | READONLY, //
__NR_linux_openat | READONLY, //
__NR_linux_stat, //
__NR_linux_lstat, //
__NR_linux_fstat, //
__NR_linux_fstatat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_readlink, //
__NR_linux_readlinkat, //
__NR_linux_statfs, //
__NR_linux_fstatfs, //
};
static const uint16_t kPledgeLinuxWpath[] = {
__NR_linux_getcwd, //
__NR_linux_open | WRITEONLY, //
__NR_linux_openat | WRITEONLY, //
__NR_linux_stat, //
__NR_linux_fstat, //
__NR_linux_lstat, //
__NR_linux_fstatat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_readlinkat, //
__NR_linux_chmod, //
__NR_linux_fchmod, //
__NR_linux_fchmodat, //
};
static const uint16_t kPledgeLinuxCpath[] = {
__NR_linux_open, //
__NR_linux_openat, //
__NR_linux_rename, //
__NR_linux_renameat, //
2022-06-27 22:00:51 +00:00
__NR_linux_renameat2, //
__NR_linux_link, //
__NR_linux_linkat, //
__NR_linux_symlink, //
__NR_linux_symlinkat, //
2022-06-27 20:01:58 +00:00
__NR_linux_rmdir, //
__NR_linux_unlink, //
__NR_linux_unlinkat, //
__NR_linux_mkdir, //
__NR_linux_mkdirat, //
};
static const uint16_t kPledgeLinuxDpath[] = {
__NR_linux_mknod, //
__NR_linux_mknodat, //
};
static const uint16_t kPledgeLinuxFattr[] = {
__NR_linux_chmod, //
__NR_linux_fchmod, //
__NR_linux_fchmodat, //
2022-06-27 20:01:58 +00:00
__NR_linux_utime, //
__NR_linux_utimes, //
2022-06-27 20:01:58 +00:00
__NR_linux_futimesat, //
__NR_linux_utimensat, //
};
static const uint16_t kPledgeLinuxInet[] = {
__NR_linux_socket | INET, //
__NR_linux_listen, //
__NR_linux_bind, //
__NR_linux_sendto, //
__NR_linux_connect, //
__NR_linux_accept, //
__NR_linux_accept4, //
__NR_linux_getsockopt, //
__NR_linux_setsockopt, //
__NR_linux_getpeername, //
__NR_linux_getsockname, //
};
static const uint16_t kPledgeLinuxUnix[] = {
2022-06-27 20:01:58 +00:00
__NR_linux_socket | UNIX, //
__NR_linux_listen, //
__NR_linux_bind, //
__NR_linux_connect, //
__NR_linux_sendto, //
__NR_linux_accept, //
__NR_linux_accept4, //
__NR_linux_getsockopt, //
__NR_linux_setsockopt, //
__NR_linux_getpeername, //
__NR_linux_getsockname, //
};
static const uint16_t kPledgeLinuxDns[] = {
2022-06-27 20:01:58 +00:00
__NR_linux_socket | INET, //
__NR_linux_sendto, //
__NR_linux_connect, //
__NR_linux_recvfrom, //
};
static const uint16_t kPledgeLinuxTty[] = {
__NR_linux_ioctl | TTY, //
};
static const uint16_t kPledgeLinuxRecvfd[] = {
__NR_linux_recvmsg, //
};
static const uint16_t kPledgeLinuxProc[] = {
__NR_linux_fork, //
__NR_linux_vfork, //
2022-06-27 22:00:51 +00:00
__NR_linux_clone, //
__NR_linux_kill, //
2022-06-27 20:01:58 +00:00
__NR_linux_setsid, //
__NR_linux_setpgid, //
__NR_linux_prlimit, //
__NR_linux_setrlimit, //
__NR_linux_getpriority, //
__NR_linux_setpriority, //
};
2022-06-27 20:01:58 +00:00
static const uint16_t kPledgeLinuxThread[] = {
__NR_linux_clone, //
__NR_linux_futex, //
};
static const uint16_t kPledgeLinuxId[] = {
__NR_linux_setuid, //
__NR_linux_setreuid, //
__NR_linux_setresuid, //
__NR_linux_setgid, //
__NR_linux_setregid, //
__NR_linux_setresgid, //
__NR_linux_setgroups, //
2022-06-27 20:01:58 +00:00
__NR_linux_prlimit, //
__NR_linux_setrlimit, //
__NR_linux_getpriority, //
__NR_linux_setpriority, //
__NR_linux_setfsuid, //
__NR_linux_setfsgid, //
};
2022-06-27 20:01:58 +00:00
static const uint16_t kPledgeLinuxExec[] = {
__NR_linux_execve, //
__NR_linux_execveat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_open | READONLY, //
__NR_linux_openat | READONLY, //
};
2022-07-16 01:07:34 +00:00
static const uint16_t kPledgeLinuxExec2[] = {
2022-06-27 22:00:51 +00:00
__NR_linux_execve, //
__NR_linux_execveat, //
2022-06-27 20:01:58 +00:00
};
static const struct Pledges {
const char *name;
const uint16_t *syscalls;
const size_t len;
} kPledgeLinux[] = {
2022-07-16 01:07:34 +00:00
[PROMISE_DEFAULT] = {"default", PLEDGE(kPledgeLinuxDefault)}, //
[PROMISE_STDIO] = {"stdio", PLEDGE(kPledgeLinuxStdio)}, //
[PROMISE_RPATH] = {"rpath", PLEDGE(kPledgeLinuxRpath)}, //
[PROMISE_WPATH] = {"wpath", PLEDGE(kPledgeLinuxWpath)}, //
[PROMISE_CPATH] = {"cpath", PLEDGE(kPledgeLinuxCpath)}, //
[PROMISE_DPATH] = {"dpath", PLEDGE(kPledgeLinuxDpath)}, //
[PROMISE_FLOCK] = {"flock", PLEDGE(kPledgeLinuxFlock)}, //
[PROMISE_FATTR] = {"fattr", PLEDGE(kPledgeLinuxFattr)}, //
[PROMISE_INET] = {"inet", PLEDGE(kPledgeLinuxInet)}, //
[PROMISE_UNIX] = {"unix", PLEDGE(kPledgeLinuxUnix)}, //
[PROMISE_DNS] = {"dns", PLEDGE(kPledgeLinuxDns)}, //
[PROMISE_TTY] = {"tty", PLEDGE(kPledgeLinuxTty)}, //
[PROMISE_RECVFD] = {"recvfd", PLEDGE(kPledgeLinuxRecvfd)}, //
[PROMISE_PROC] = {"proc", PLEDGE(kPledgeLinuxProc)}, //
[PROMISE_THREAD] = {"thread", PLEDGE(kPledgeLinuxThread)}, //
[PROMISE_EXEC] = {"exec", PLEDGE(kPledgeLinuxExec)}, //
[PROMISE_EXECNATIVE] = {"execnative", PLEDGE(kPledgeLinuxExec2)}, //
[PROMISE_ID] = {"id", PLEDGE(kPledgeLinuxId)}, //
[PROMISE_MAX + 1] = {0}, //
};
static const struct sock_filter kFilterStart[] = {
2022-06-27 20:01:58 +00:00
// make sure this isn't an i386 binary or something
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
// each filter assumes ordinal is already loaded into accumulator
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
};
static const struct sock_filter kFilterEnd[] = {
2022-06-27 20:01:58 +00:00
// if syscall isn't whitelisted then have it return -EPERM (-1)
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (1 & SECCOMP_RET_DATA)),
};
static bool AppendFilter(struct Filter *f, struct sock_filter *p, size_t n) {
size_t m;
struct sock_filter *q;
m = f->n + n;
if (!(q = realloc(f->p, m * sizeof(*f->p)))) return false;
memcpy(q + f->n, p, n * sizeof(*q));
f->p = q;
f->n = m;
return true;
}
2022-06-27 20:01:58 +00:00
// SYSCALL is only allowed in the .privileged section
// We assume program image is loaded in 32-bit spaces
2022-06-27 20:01:58 +00:00
static bool AppendOriginVerification(struct Filter *f) {
intptr_t x = (intptr_t)__privileged_start;
intptr_t y = (intptr_t)__privileged_end;
assert(0 < x && x < y && y < INT_MAX);
2022-06-27 20:01:58 +00:00
struct sock_filter fragment[] = {
/*L0*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer) + 4),
/*L1*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 2),
/*L2*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer)),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, x, 0, 5 - 4),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, y, 0, 6 - 5),
/*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
/*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L7*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// Authorize specific system call w/o considering its arguments.
static bool AllowSyscall(struct Filter *f, uint16_t w) {
struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, w, 0, 2 - 1),
/*L1*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L2*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of ioctl() must be one of:
//
// - FIONREAD (0x541b)
// - FIONBIO (0x5421)
// - FIOCLEX (0x5451)
// - FIONCLEX (0x5450)
//
static bool AllowIoctl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 8 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x541b, 6 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5421, 6 - 4, 0),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5451, 6 - 5, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5450, 0, 7 - 6),
/*L6*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L8*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of ioctl() must be one of:
//
// - TCGETS (0x5401)
// - TCSETS (0x5402)
// - TCSETSW (0x5403)
// - TCSETSF (0x5404)
// - TIOCGWINSZ (0x5413)
// - TIOCSPGRP (0x5410)
// - TIOCGPGRP (0x540f)
// - TIOCSWINSZ (0x5414)
// - TIOCSBRK (0x5427)
// - TCFLSH (0x540b)
2022-06-27 20:01:58 +00:00
//
static bool AllowIoctlTty(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 14 - 1),
2022-06-27 20:01:58 +00:00
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5401, 12 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5402, 12 - 4, 0),
/* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5403, 12 - 5, 0),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5404, 12 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5413, 12 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5410, 12 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540f, 12 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5414, 12 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540b, 12 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5427, 0, 13 - 12),
/*L12*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L13*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L14*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// The level argument of setsockopt() must be one of:
//
2022-07-14 12:42:24 +00:00
// - SOL_IP (0)
2022-06-27 20:01:58 +00:00
// - SOL_SOCKET (1)
// - SOL_TCP (6)
//
// The optname argument of setsockopt() must be one of:
//
// - SO_TYPE ( 3)
// - SO_REUSEPORT (15)
// - SO_REUSEADDR ( 2)
// - SO_KEEPALIVE ( 9)
// - SO_RCVTIMEO (20)
// - SO_SNDTIMEO (21)
2022-07-14 12:42:24 +00:00
// - IP_RECVTTL (12)
2022-06-27 20:01:58 +00:00
//
static bool AllowSetsockopt(struct Filter *f) {
static const int nr = __NR_linux_setsockopt;
static const struct sock_filter fragment[] = {
2022-07-14 12:42:24 +00:00
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 15 - 1),
2022-06-27 20:01:58 +00:00
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
2022-07-14 12:42:24 +00:00
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 5 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 5 - 4, 0),
/* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 14 - 5),
/* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 13 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 12, 13 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 13 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 13 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 13 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 13 - 12, 0),
/*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 14 - 13),
/*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L15*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// The level argument of getsockopt() must be one of:
//
// - SOL_SOCKET (1)
// - SOL_TCP (6)
//
// The optname argument of getsockopt() must be one of:
//
// - SO_TYPE ( 3)
// - SO_REUSEPORT (15)
// - SO_REUSEADDR ( 2)
// - SO_KEEPALIVE ( 9)
// - SO_RCVTIMEO (20)
// - SO_SNDTIMEO (21)
//
static bool AllowGetsockopt(struct Filter *f) {
static const int nr = __NR_linux_getsockopt;
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 13 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 11 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 11 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 11 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 11 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 11 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 11 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 11 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 12 - 11),
/*L11*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L12*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L13*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The flags parameter of mmap() must not have:
2022-06-27 20:01:58 +00:00
//
// - MAP_LOCKED (0x02000)
// - MAP_NONBLOCK (0x10000)
// - MAP_HUGETLB (0x40000)
//
static bool AllowMmap(struct Filter *f) {
intptr_t y = (intptr_t)__privileged_end;
assert(0 < y && y < INT_MAX);
struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags
2022-07-14 20:04:28 +00:00
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x52000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// The prot parameter of mmap() may only have:
2022-06-27 20:01:58 +00:00
//
// - PROT_NONE (0)
// - PROT_READ (1)
// - PROT_WRITE (2)
2022-06-27 20:01:58 +00:00
//
// The flags parameter must not have:
//
// - MAP_LOCKED (0x02000)
// - MAP_POPULATE (0x08000)
// - MAP_NONBLOCK (0x10000)
// - MAP_HUGETLB (0x40000)
//
static bool AllowMmapNoexec(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 9 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)),
2022-06-27 20:01:58 +00:00
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 8 - 4),
/*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags
/*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x5a000),
/*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 8 - 7),
/*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L9*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The prot parameter of mprotect() may only have:
2022-06-27 20:01:58 +00:00
//
// - PROT_NONE (0)
// - PROT_READ (1)
// - PROT_WRITE (2)
2022-06-27 20:01:58 +00:00
//
static bool AllowMprotectNoexec(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mprotect, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)),
2022-06-27 20:01:58 +00:00
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The open() system call is permitted only when
2022-06-27 20:01:58 +00:00
//
// - (flags & O_ACCMODE) == O_RDONLY
2022-06-27 20:01:58 +00:00
//
static bool AllowOpenReadonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The flags parameter of open() must not have:
//
// - O_WRONLY (1)
// - O_RDWR (2)
//
static bool AllowOpenatReadonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The open() flags parameter must not contain
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
static bool AllowOpenWriteonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200100),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The openat() flags parameter must not contain
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
static bool AllowOpenatWriteonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200100),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
2022-06-27 20:01:58 +00:00
// If the flags parameter of open() has one of:
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
// Then the mode parameter must not have:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowOpen(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 12 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 11 - 10),
/*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L12*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// If the flags parameter of openat() has one of:
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
// Then the mode parameter must not have:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowOpenat(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 12 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])),
/* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 11 - 10),
/*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L12*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of fcntl() must be one of:
//
// - F_DUPFD (0)
// - F_DUPFD_CLOEXEC (1030)
// - F_GETFD (1)
// - F_SETFD (2)
// - F_GETFL (3)
// - F_SETFL (4)
//
static bool AllowFcntl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1030, 4 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 5 - 4, 0),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of fcntl() must be one of:
//
// - F_GETLK (5)
// - F_SETLK (6)
// - F_SETLKW (7)
//
static bool AllowFcntlLock(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 0, 5 - 3),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 8, 5 - 4, 0),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The addr parameter of sendto() must be
//
// - NULL
//
static bool AllowSendtoAddrless(struct Filter *f) {
2022-06-27 20:01:58 +00:00
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sendto, 0, 7 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 0),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3),
/*L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 4),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3),
/*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L7*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The sig parameter of sigaction() must NOT be
//
// - SIGSYS (31)
//
static bool AllowSigaction(struct Filter *f) {
static const int nr = __NR_linux_sigaction;
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 5 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 31, 4 - 3, 0),
2022-06-27 20:01:58 +00:00
/*L3*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L5*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The family parameter of socket() must be one of:
//
// - AF_INET (2)
2022-06-27 20:01:58 +00:00
// - AF_INET6 (10)
//
// The type parameter of socket() will ignore:
//
// - SOCK_CLOEXEC (0x80000)
// - SOCK_NONBLOCK (0x00800)
//
// The type parameter of socket() must be one of:
//
// - SOCK_STREAM (1)
// - SOCK_DGRAM (2)
//
// The protocol parameter of socket() must be one of:
//
// - 0
2022-07-14 12:42:24 +00:00
// - IPPROTO_ICMP (1)
// - IPPROTO_TCP (6)
// - IPPROTO_UDP (17)
//
2022-06-27 20:01:58 +00:00
static bool AllowSocketInet(struct Filter *f) {
static const struct sock_filter fragment[] = {
2022-07-14 12:42:24 +00:00
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 15 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 4 - 3, 0),
2022-07-14 12:42:24 +00:00
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 10, 0, 14 - 4),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 8 - 7, 0),
2022-07-14 12:42:24 +00:00
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 14 - 8),
/* L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
2022-07-14 12:42:24 +00:00
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 13 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 13 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 13 - 12, 0),
/*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 17, 0, 14 - 12),
/*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L15*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// The family parameter of socket() must be one of:
//
// - AF_UNIX (1)
// - AF_LOCAL (1)
//
// The type parameter of socket() will ignore:
//
// - SOCK_CLOEXEC (0x80000)
// - SOCK_NONBLOCK (0x00800)
//
// The type parameter of socket() must be one of:
//
// - SOCK_STREAM (1)
// - SOCK_DGRAM (2)
//
// The protocol parameter of socket() must be one of:
//
// - 0
//
2022-06-27 20:01:58 +00:00
static bool AllowSocketUnix(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 11 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 10 - 3),
/* L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 7 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 10 - 9),
/* L9*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L10*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L11*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The first parameter of prctl() can be any of
//
// - PR_SET_NAME (15)
// - PR_GET_NAME (16)
// - PR_GET_SECCOMP (21)
// - PR_SET_SECCOMP (22)
// - PR_SET_NO_NEW_PRIVS (38)
//
static bool AllowPrctl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prctl, 0, 9 - 1),
2022-06-27 20:01:58 +00:00
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 7 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 16, 7 - 4, 0),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 7 - 5, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 7 - 6, 0),
/*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 38, 0, 8 - 7),
/*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L9*/ /* next filter */
2022-06-27 20:01:58 +00:00
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of chmod() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowChmod(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_chmod, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of fchmod() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowFchmod(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmod, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of fchmodat() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowFchmodat(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmodat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
2022-07-16 01:07:34 +00:00
static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len, bool needmapexec,
bool needmorphing) {
int i;
for (i = 0; i < len; ++i) {
2022-06-27 20:01:58 +00:00
switch (p[i]) {
case __NR_linux_mmap:
if (needmapexec) {
if (!AllowMmap(f)) return false;
} else {
if (!AllowMmapNoexec(f)) return false;
}
break;
case __NR_linux_mprotect:
if (needmorphing) {
if (!AllowSyscall(f, __NR_linux_mprotect)) return false;
} else {
if (!AllowMprotectNoexec(f)) return false;
}
break;
case __NR_linux_chmod:
if (!AllowChmod(f)) return false;
break;
case __NR_linux_fchmod:
if (!AllowFchmod(f)) return false;
break;
case __NR_linux_fchmodat:
if (!AllowFchmodat(f)) return false;
break;
case __NR_linux_sigaction:
if (!AllowSigaction(f)) return false;
break;
case __NR_linux_prctl:
if (!AllowPrctl(f)) return false;
break;
2022-06-27 20:01:58 +00:00
case __NR_linux_open:
if (!AllowOpen(f)) return false;
break;
case __NR_linux_openat:
if (!AllowOpenat(f)) return false;
break;
case __NR_linux_open | READONLY:
if (!AllowOpenReadonly(f)) return false;
break;
case __NR_linux_openat | READONLY:
if (!AllowOpenatReadonly(f)) return false;
break;
case __NR_linux_open | WRITEONLY:
if (!AllowOpenWriteonly(f)) return false;
break;
case __NR_linux_openat | WRITEONLY:
if (!AllowOpenatWriteonly(f)) return false;
break;
2022-06-27 20:01:58 +00:00
case __NR_linux_setsockopt:
if (!AllowSetsockopt(f)) return false;
break;
case __NR_linux_getsockopt:
if (!AllowGetsockopt(f)) return false;
break;
case __NR_linux_fcntl:
if (!AllowFcntl(f)) return false;
break;
case __NR_linux_fcntl | LOCK:
if (!AllowFcntlLock(f)) return false;
break;
case __NR_linux_ioctl:
if (!AllowIoctl(f)) return false;
break;
case __NR_linux_ioctl | TTY:
if (!AllowIoctlTty(f)) return false;
break;
case __NR_linux_socket | INET:
if (!AllowSocketInet(f)) return false;
break;
case __NR_linux_socket | UNIX:
if (!AllowSocketUnix(f)) return false;
break;
case __NR_linux_sendto | ADDRLESS:
if (!AllowSendtoAddrless(f)) return false;
2022-06-27 20:01:58 +00:00
break;
default:
assert(~p[i] & ~0xfff);
2022-06-27 20:01:58 +00:00
if (!AllowSyscall(f, p[i])) return false;
break;
}
}
return true;
}
2022-07-16 01:07:34 +00:00
static int FindPromise(const struct Pledges *p, const char *name, size_t *len) {
int i;
for (i = 0; p[i].name; ++i) {
if (!strcasecmp(name, p[i].name)) {
*len = p[i].len;
2022-07-16 01:07:34 +00:00
return i;
}
}
2022-07-16 01:07:34 +00:00
return -1;
}
static int sys_pledge_linux(const char *promises, const char *execpromises) {
bool ok;
int rc = -1;
size_t plen;
2022-07-16 01:07:34 +00:00
int promise;
2022-06-27 20:01:58 +00:00
bool needmapexec;
bool needexecnative;
2022-06-27 20:01:58 +00:00
bool needmorphing;
struct Filter f = {0};
const uint16_t *pledge;
2022-07-16 01:07:34 +00:00
unsigned long ipromises = -1;
char *s, *tok, *state, *start;
if (execpromises) return einval();
2022-06-27 20:01:58 +00:00
needmapexec = strstr(promises, "exec");
needmorphing = strstr(promises, "thread");
needexecnative = strstr(promises, "execnative");
2022-07-16 01:07:34 +00:00
if ((start = s = strdup(promises)) && AppendFilter(&f, kFilterStart, ARRAYLEN(kFilterStart)) &&
(needmapexec || needexecnative || AppendOriginVerification(&f)) &&
2022-07-16 01:07:34 +00:00
AppendPledge(&f, kPledgeLinuxDefault, ARRAYLEN(kPledgeLinuxDefault), needmapexec,
needmorphing)) {
for (ok = true; (tok = strtok_r(start, " \t\r\n", &state)); start = 0) {
2022-07-16 01:07:34 +00:00
if ((promise = FindPromise(kPledgeLinux, tok, &plen)) != -1) {
pledge = kPledgeLinux[promise].syscalls;
ipromises &= ~(1ULL << promise);
} else {
ok = false;
rc = einval();
break;
}
2022-06-27 20:01:58 +00:00
if (!AppendPledge(&f, pledge, plen, needmapexec, needmorphing)) {
ok = false;
break;
}
}
if (ok && AppendFilter(&f, kFilterEnd, ARRAYLEN(kFilterEnd)) &&
(rc = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) != -1) {
struct sock_fprog sandbox = {.len = f.n, .filter = f.p};
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &sandbox);
}
2022-07-16 01:07:34 +00:00
if (!rc) {
__promises = ipromises;
}
}
free(f.p);
free(s);
return rc;
}
2022-07-16 01:07:34 +00:00
static void SetPromises(const char *promises) {
int promise;
size_t plen;
char *tok, *state, *start;
unsigned long ipromises = -1;
while ((tok = strtok_r(start, " \t\r\n", &state))) {
if ((promise = FindPromise(kPledgeLinux, tok, &plen)) != -1) {
ipromises &= ~(1ULL << promise);
}
start = 0;
}
__promises = ipromises;
}
/**
2022-06-27 20:01:58 +00:00
* Restricts system operations, e.g.
*
* pledge("stdio tty", 0);
*
* Pledging causes most system calls to become unavailable. Your system
* call policy is enforced by the kernel, which means it can propagate
* across execve() if permitted. This system call is supported on
* OpenBSD and Linux where it's polyfilled using SECCOMP BPF. The way it
* works on Linux is verboten system calls will raise EPERM whereas
* OpenBSD just kills the process while logging a helpful message to
* /var/log/messages explaining which promise category you needed.
*
2022-07-16 03:47:20 +00:00
* By default exit() is allowed. This is useful for processes that
* perform pure computation and interface with the parent via shared
* memory. On Linux we mean sys_exit (_Exit1), not sys_exit_group
* (_Exit). The difference is effectively meaningless, since _Exit()
* will attempt both. All it means is that, if you're using threads,
* then a `pledge("", 0)` thread can't kill all your threads unless you
* `pledge("stdio", 0)`.
*
2022-06-27 20:01:58 +00:00
* Once pledge is in effect, the chmod functions (if allowed) will not
* permit the sticky/setuid/setgid bits to change. Linux will EPERM here
* and OpenBSD should ignore those three bits rather than crashing.
*
* User and group IDs can't be changed once pledge is in effect. OpenBSD
* should ignore chown without crashing; whereas Linux will just EPERM.
2022-06-27 20:01:58 +00:00
*
* Memory functions won't permit creating executable code after pledge.
* Restrictions on origin of SYSCALL instructions will become enforced
* on Linux (cf. msyscall) after pledge too, which means the process
* gets killed if SYSCALL is used outside the .privileged section. One
* exception is if the "exec" group is specified, in which case these
* restrictions need to be loosened.
*
* Using pledge is irreversible. On Linux it causes PR_SET_NO_NEW_PRIVS
* to be set on your process; however, if "id" or "recvfd" are allowed
* then then they theoretically could permit the gaining of some new
* privileges. You may call pledge() multiple times if "stdio" is
* allowed. In that case, the process can only move towards a more
* restrictive state.
*
* pledge() can't filter file system paths or internet addresses. For
* example, if you enable a category like "inet" then your process will
* be able to talk to any internet address. The same applies to
* categories like "wpath" and "cpath"; if enabled, any path the
* effective user id is permitted to change will be changeable.
*
* `promises` is a string that may include any of the following groups
* delimited by spaces.
*
2022-07-14 11:32:33 +00:00
* - "stdio" allows exit, close, dup, dup2, dup3, fchdir, fstat, fsync,
* fdatasync, ftruncate, getdents, getegid, getrandom, geteuid,
* getgid, getgroups, getitimer, getpgid, getpgrp, getpid, getppid,
* getresgid, getresuid, getrlimit, getsid, wait4, gettimeofday,
* getuid, lseek, madvise, brk, arch_prctl, uname, set_tid_address,
* clock_getres, clock_gettime, clock_nanosleep, mmap (PROT_EXEC and
* weird flags aren't allowed), mprotect (PROT_EXEC isn't allowed),
* msync, munmap, nanosleep, pipe, pipe2, read, readv, pread, recv,
* poll, recvfrom, preadv, write, writev, pwrite, pwritev, select,
* send, sendto (only if addr is null), setitimer, shutdown, sigaction
* (but SIGSYS is forbidden), sigaltstack, sigprocmask, sigreturn,
* sigsuspend, umask, socketpair, ioctl(FIONREAD), ioctl(FIONBIO),
* ioctl(FIOCLEX), ioctl(FIONCLEX), fcntl(F_GETFD), fcntl(F_SETFD),
* fcntl(F_GETFL), fcntl(F_SETFL).
*
2022-06-27 20:01:58 +00:00
* - "rpath" (read-only path ops) allows chdir, getcwd, open(O_RDONLY),
* openat(O_RDONLY), stat, fstat, lstat, fstatat, access, faccessat,
* readlink, readlinkat, statfs, fstatfs.
*
* - "wpath" (write path ops) allows getcwd, open(O_WRONLY),
* openat(O_WRONLY), stat, fstat, lstat, fstatat, access, faccessat,
* readlink, readlinkat, chmod, fchmod, fchmodat.
*
* - "cpath" (create path ops) allows open(O_CREAT), openat(O_CREAT),
* rename, renameat, renameat2, link, linkat, symlink, symlinkat,
* unlink, rmdir, unlinkat, mkdir, mkdirat.
*
* - "dpath" (create special path ops) allows mknod, mknodat, mkfifo.
*
2022-06-27 20:01:58 +00:00
* - "flock" allows flock, fcntl(F_GETLK), fcntl(F_SETLK),
* fcntl(F_SETLKW).
*
2022-06-27 20:01:58 +00:00
* - "tty" allows ioctl(TIOCGWINSZ), ioctl(TCGETS), ioctl(TCSETS),
* ioctl(TCSETSW), ioctl(TCSETSF).
*
* - "recvfd" allows recvmsg(SCM_RIGHTS).
*
2022-06-27 20:01:58 +00:00
* - "fattr" allows chmod, fchmod, fchmodat, utime, utimes, futimens,
* utimensat.
*
2022-06-27 20:01:58 +00:00
* - "inet" allows socket(AF_INET), listen, bind, connect, accept,
* accept4, getpeername, getsockname, setsockopt, getsockopt, sendto.
*
2022-06-27 20:01:58 +00:00
* - "unix" allows socket(AF_UNIX), listen, bind, connect, accept,
* accept4, getpeername, getsockname, setsockopt, getsockopt.
*
2022-06-27 20:01:58 +00:00
* - "dns" allows socket(AF_INET), sendto, recvfrom, connect.
*
2022-06-27 20:01:58 +00:00
* - "proc" allows fork, vfork, kill, getpriority, setpriority, prlimit,
* setrlimit, setpgid, setsid.
*
2022-06-27 20:01:58 +00:00
* - "thread" allows clone, futex, and permits PROT_EXEC in mprotect.
*
* - "id" allows setuid, setreuid, setresuid, setgid, setregid,
* setresgid, setgroups, prlimit, setrlimit, getpriority, setpriority,
* setfsuid, setfsgid.
2022-06-27 20:01:58 +00:00
*
* - "exec" allows execve, execveat, access, faccessat. On Linux this
* also weakens some security to permit running APE binaries. However
* on OpenBSD they must be assimilate beforehand. On Linux, mmap()
2022-06-27 22:00:51 +00:00
* will be loosened up to allow creating PROT_EXEC memory (for APE
* loader) and system call origin verification won't be activated.
*
* - "execnative" allows execve, execveat. Can only be used to run
* native executables; you won't be able to run APE binaries. mmap()
* and mprotect() are still prevented from creating executable memory.
* System call origin verification can't be enabled. If you always
* assimilate your APE binaries, then this should be preferred.
*
2022-06-27 20:01:58 +00:00
* @return 0 on success, or -1 w/ errno
* @raise ENOSYS if host os isn't Linux or OpenBSD
* @raise EINVAL if `execpromises` is used on Linux
*/
int pledge(const char *promises, const char *execpromises) {
int rc;
if (IsLinux()) {
rc = sys_pledge_linux(promises, execpromises);
} else {
rc = sys_pledge(promises, execpromises);
2022-07-16 01:07:34 +00:00
if (!rc) {
SetPromises(promises);
}
}
STRACE("pledge(%#s, %#s) → %d% m", promises, execpromises, rc);
return rc;
}