cosmopolitan/libc/mem/pledge.c
2022-07-15 20:47:20 -07:00

1250 lines
46 KiB
C

/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2022 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/assert.h"
#include "libc/calls/calls.h"
#include "libc/calls/strace.internal.h"
#include "libc/calls/struct/bpf.h"
#include "libc/calls/struct/filter.h"
#include "libc/calls/struct/seccomp.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-sysv.internal.h"
#include "libc/dce.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/promises.internal.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
#include "libc/runtime/runtime.h"
#include "libc/sock/struct/sockaddr.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/af.h"
#include "libc/sysv/consts/audit.h"
#include "libc/sysv/consts/f.h"
#include "libc/sysv/consts/map.h"
#include "libc/sysv/consts/nrlinux.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/pr.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/errfuns.h"
#define READONLY 0x8000
#define WRITEONLY 0x4000
#define INET 0x8000
#define UNIX 0x4000
#define ADDRLESS 0x2000
#define LOCK 0x8000
#define TTY 0x8000
#define OFF(f) offsetof(struct seccomp_data, f)
#define PLEDGE(pledge) pledge, ARRAYLEN(pledge)
struct Filter {
size_t n;
struct sock_filter *p;
};
static const uint16_t kPledgeLinuxDefault[] = {
__NR_linux_exit, //
};
static const uint16_t kPledgeLinuxStdio[] = {
__NR_linux_exit_group, //
__NR_linux_clock_getres, //
__NR_linux_clock_gettime, //
__NR_linux_clock_nanosleep, //
__NR_linux_close, //
__NR_linux_write, //
__NR_linux_writev, //
__NR_linux_pwrite, //
__NR_linux_pwritev, //
__NR_linux_pwritev2, //
__NR_linux_read, //
__NR_linux_readv, //
__NR_linux_pread, //
__NR_linux_preadv, //
__NR_linux_preadv2, //
__NR_linux_dup, //
__NR_linux_dup2, //
__NR_linux_dup3, //
__NR_linux_fchdir, //
__NR_linux_fcntl, //
__NR_linux_fstat, //
__NR_linux_fsync, //
__NR_linux_fdatasync, //
__NR_linux_ftruncate, //
__NR_linux_getdents, //
__NR_linux_getegid, //
__NR_linux_getrandom, //
__NR_linux_getgroups, //
__NR_linux_getpgid, //
__NR_linux_getpgrp, //
__NR_linux_getpid, //
__NR_linux_gettid, //
__NR_linux_getuid, //
__NR_linux_getgid, //
__NR_linux_getsid, //
__NR_linux_getppid, //
__NR_linux_geteuid, //
__NR_linux_getrlimit, //
__NR_linux_getresgid, //
__NR_linux_getresuid, //
__NR_linux_getitimer, //
__NR_linux_setitimer, //
__NR_linux_gettimeofday, //
__NR_linux_copy_file_range, //
__NR_linux_splice, //
__NR_linux_lseek, //
__NR_linux_tee, //
__NR_linux_brk, //
__NR_linux_mmap, //
__NR_linux_msync, //
__NR_linux_munmap, //
__NR_linux_madvise, //
__NR_linux_fadvise, //
__NR_linux_mprotect, //
__NR_linux_arch_prctl, //
__NR_linux_set_tid_address, //
__NR_linux_nanosleep, //
__NR_linux_pipe, //
__NR_linux_pipe2, //
__NR_linux_poll, //
__NR_linux_select, //
__NR_linux_recvfrom, //
__NR_linux_sendto | ADDRLESS, //
__NR_linux_ioctl, //
__NR_linux_shutdown, //
__NR_linux_sigaction, //
__NR_linux_sigaltstack, //
__NR_linux_sigprocmask, //
__NR_linux_sigsuspend, //
__NR_linux_sigreturn, //
__NR_linux_socketpair, //
__NR_linux_umask, //
__NR_linux_wait4, //
__NR_linux_uname, //
__NR_linux_prctl, //
};
static const uint16_t kPledgeLinuxFlock[] = {
__NR_linux_flock, //
__NR_linux_fcntl | LOCK, //
};
static const uint16_t kPledgeLinuxRpath[] = {
__NR_linux_chdir, //
__NR_linux_getcwd, //
__NR_linux_open | READONLY, //
__NR_linux_openat | READONLY, //
__NR_linux_stat, //
__NR_linux_lstat, //
__NR_linux_fstat, //
__NR_linux_fstatat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_readlink, //
__NR_linux_readlinkat, //
__NR_linux_statfs, //
__NR_linux_fstatfs, //
};
static const uint16_t kPledgeLinuxWpath[] = {
__NR_linux_getcwd, //
__NR_linux_open | WRITEONLY, //
__NR_linux_openat | WRITEONLY, //
__NR_linux_stat, //
__NR_linux_fstat, //
__NR_linux_lstat, //
__NR_linux_fstatat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_readlinkat, //
__NR_linux_chmod, //
__NR_linux_fchmod, //
__NR_linux_fchmodat, //
};
static const uint16_t kPledgeLinuxCpath[] = {
__NR_linux_open, //
__NR_linux_openat, //
__NR_linux_rename, //
__NR_linux_renameat, //
__NR_linux_renameat2, //
__NR_linux_link, //
__NR_linux_linkat, //
__NR_linux_symlink, //
__NR_linux_symlinkat, //
__NR_linux_rmdir, //
__NR_linux_unlink, //
__NR_linux_unlinkat, //
__NR_linux_mkdir, //
__NR_linux_mkdirat, //
};
static const uint16_t kPledgeLinuxDpath[] = {
__NR_linux_mknod, //
__NR_linux_mknodat, //
};
static const uint16_t kPledgeLinuxFattr[] = {
__NR_linux_chmod, //
__NR_linux_fchmod, //
__NR_linux_fchmodat, //
__NR_linux_utime, //
__NR_linux_utimes, //
__NR_linux_futimesat, //
__NR_linux_utimensat, //
};
static const uint16_t kPledgeLinuxInet[] = {
__NR_linux_socket | INET, //
__NR_linux_listen, //
__NR_linux_bind, //
__NR_linux_sendto, //
__NR_linux_connect, //
__NR_linux_accept, //
__NR_linux_accept4, //
__NR_linux_getsockopt, //
__NR_linux_setsockopt, //
__NR_linux_getpeername, //
__NR_linux_getsockname, //
};
static const uint16_t kPledgeLinuxUnix[] = {
__NR_linux_socket | UNIX, //
__NR_linux_listen, //
__NR_linux_bind, //
__NR_linux_connect, //
__NR_linux_sendto, //
__NR_linux_accept, //
__NR_linux_accept4, //
__NR_linux_getsockopt, //
__NR_linux_setsockopt, //
__NR_linux_getpeername, //
__NR_linux_getsockname, //
};
static const uint16_t kPledgeLinuxDns[] = {
__NR_linux_socket | INET, //
__NR_linux_sendto, //
__NR_linux_connect, //
__NR_linux_recvfrom, //
};
static const uint16_t kPledgeLinuxTty[] = {
__NR_linux_ioctl | TTY, //
};
static const uint16_t kPledgeLinuxRecvfd[] = {
__NR_linux_recvmsg, //
};
static const uint16_t kPledgeLinuxProc[] = {
__NR_linux_fork, //
__NR_linux_vfork, //
__NR_linux_clone, //
__NR_linux_kill, //
__NR_linux_setsid, //
__NR_linux_setpgid, //
__NR_linux_prlimit, //
__NR_linux_setrlimit, //
__NR_linux_getpriority, //
__NR_linux_setpriority, //
};
static const uint16_t kPledgeLinuxThread[] = {
__NR_linux_clone, //
__NR_linux_futex, //
};
static const uint16_t kPledgeLinuxId[] = {
__NR_linux_setuid, //
__NR_linux_setreuid, //
__NR_linux_setresuid, //
__NR_linux_setgid, //
__NR_linux_setregid, //
__NR_linux_setresgid, //
__NR_linux_setgroups, //
__NR_linux_prlimit, //
__NR_linux_setrlimit, //
__NR_linux_getpriority, //
__NR_linux_setpriority, //
__NR_linux_setfsuid, //
__NR_linux_setfsgid, //
};
static const uint16_t kPledgeLinuxExec[] = {
__NR_linux_execve, //
__NR_linux_execveat, //
__NR_linux_access, //
__NR_linux_faccessat, //
__NR_linux_open | READONLY, //
__NR_linux_openat | READONLY, //
};
static const uint16_t kPledgeLinuxExec2[] = {
__NR_linux_execve, //
__NR_linux_execveat, //
};
static const struct Pledges {
const char *name;
const uint16_t *syscalls;
const size_t len;
} kPledgeLinux[] = {
[PROMISE_DEFAULT] = {"default", PLEDGE(kPledgeLinuxDefault)}, //
[PROMISE_STDIO] = {"stdio", PLEDGE(kPledgeLinuxStdio)}, //
[PROMISE_RPATH] = {"rpath", PLEDGE(kPledgeLinuxRpath)}, //
[PROMISE_WPATH] = {"wpath", PLEDGE(kPledgeLinuxWpath)}, //
[PROMISE_CPATH] = {"cpath", PLEDGE(kPledgeLinuxCpath)}, //
[PROMISE_DPATH] = {"dpath", PLEDGE(kPledgeLinuxDpath)}, //
[PROMISE_FLOCK] = {"flock", PLEDGE(kPledgeLinuxFlock)}, //
[PROMISE_FATTR] = {"fattr", PLEDGE(kPledgeLinuxFattr)}, //
[PROMISE_INET] = {"inet", PLEDGE(kPledgeLinuxInet)}, //
[PROMISE_UNIX] = {"unix", PLEDGE(kPledgeLinuxUnix)}, //
[PROMISE_DNS] = {"dns", PLEDGE(kPledgeLinuxDns)}, //
[PROMISE_TTY] = {"tty", PLEDGE(kPledgeLinuxTty)}, //
[PROMISE_RECVFD] = {"recvfd", PLEDGE(kPledgeLinuxRecvfd)}, //
[PROMISE_PROC] = {"proc", PLEDGE(kPledgeLinuxProc)}, //
[PROMISE_THREAD] = {"thread", PLEDGE(kPledgeLinuxThread)}, //
[PROMISE_EXEC] = {"exec", PLEDGE(kPledgeLinuxExec)}, //
[PROMISE_EXECNATIVE] = {"execnative", PLEDGE(kPledgeLinuxExec2)}, //
[PROMISE_ID] = {"id", PLEDGE(kPledgeLinuxId)}, //
[PROMISE_MAX + 1] = {0}, //
};
static const struct sock_filter kFilterStart[] = {
// make sure this isn't an i386 binary or something
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
// each filter assumes ordinal is already loaded into accumulator
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
};
static const struct sock_filter kFilterEnd[] = {
// if syscall isn't whitelisted then have it return -EPERM (-1)
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (1 & SECCOMP_RET_DATA)),
};
static bool AppendFilter(struct Filter *f, struct sock_filter *p, size_t n) {
size_t m;
struct sock_filter *q;
m = f->n + n;
if (!(q = realloc(f->p, m * sizeof(*f->p)))) return false;
memcpy(q + f->n, p, n * sizeof(*q));
f->p = q;
f->n = m;
return true;
}
// SYSCALL is only allowed in the .privileged section
// We assume program image is loaded in 32-bit spaces
static bool AppendOriginVerification(struct Filter *f) {
intptr_t x = (intptr_t)__privileged_start;
intptr_t y = (intptr_t)__privileged_end;
assert(0 < x && x < y && y < INT_MAX);
struct sock_filter fragment[] = {
/*L0*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer) + 4),
/*L1*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 2),
/*L2*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer)),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, x, 0, 5 - 4),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, y, 0, 6 - 5),
/*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL),
/*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L7*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// Authorize specific system call w/o considering its arguments.
static bool AllowSyscall(struct Filter *f, uint16_t w) {
struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, w, 0, 2 - 1),
/*L1*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L2*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of ioctl() must be one of:
//
// - FIONREAD (0x541b)
// - FIONBIO (0x5421)
// - FIOCLEX (0x5451)
// - FIONCLEX (0x5450)
//
static bool AllowIoctl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 8 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x541b, 6 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5421, 6 - 4, 0),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5451, 6 - 5, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5450, 0, 7 - 6),
/*L6*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L8*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of ioctl() must be one of:
//
// - TCGETS (0x5401)
// - TCSETS (0x5402)
// - TCSETSW (0x5403)
// - TCSETSF (0x5404)
// - TIOCGWINSZ (0x5413)
// - TIOCSPGRP (0x5410)
// - TIOCGPGRP (0x540f)
// - TIOCSWINSZ (0x5414)
// - TIOCSBRK (0x5427)
// - TCFLSH (0x540b)
//
static bool AllowIoctlTty(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 14 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5401, 12 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5402, 12 - 4, 0),
/* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5403, 12 - 5, 0),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5404, 12 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5413, 12 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5410, 12 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540f, 12 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5414, 12 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540b, 12 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5427, 0, 13 - 12),
/*L12*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L13*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L14*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The level argument of setsockopt() must be one of:
//
// - SOL_IP (0)
// - SOL_SOCKET (1)
// - SOL_TCP (6)
//
// The optname argument of setsockopt() must be one of:
//
// - SO_TYPE ( 3)
// - SO_REUSEPORT (15)
// - SO_REUSEADDR ( 2)
// - SO_KEEPALIVE ( 9)
// - SO_RCVTIMEO (20)
// - SO_SNDTIMEO (21)
// - IP_RECVTTL (12)
//
static bool AllowSetsockopt(struct Filter *f) {
static const int nr = __NR_linux_setsockopt;
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 15 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 5 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 5 - 4, 0),
/* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 14 - 5),
/* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 13 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 12, 13 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 13 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 13 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 13 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 13 - 12, 0),
/*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 14 - 13),
/*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L15*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The level argument of getsockopt() must be one of:
//
// - SOL_SOCKET (1)
// - SOL_TCP (6)
//
// The optname argument of getsockopt() must be one of:
//
// - SO_TYPE ( 3)
// - SO_REUSEPORT (15)
// - SO_REUSEADDR ( 2)
// - SO_KEEPALIVE ( 9)
// - SO_RCVTIMEO (20)
// - SO_SNDTIMEO (21)
//
static bool AllowGetsockopt(struct Filter *f) {
static const int nr = __NR_linux_getsockopt;
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 13 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 11 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 11 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 11 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 11 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 11 - 8, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 11 - 9, 0),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 11 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 12 - 11),
/*L11*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L12*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L13*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The flags parameter of mmap() must not have:
//
// - MAP_LOCKED (0x02000)
// - MAP_NONBLOCK (0x10000)
// - MAP_HUGETLB (0x40000)
//
static bool AllowMmap(struct Filter *f) {
intptr_t y = (intptr_t)__privileged_end;
assert(0 < y && y < INT_MAX);
struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x52000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The prot parameter of mmap() may only have:
//
// - PROT_NONE (0)
// - PROT_READ (1)
// - PROT_WRITE (2)
//
// The flags parameter must not have:
//
// - MAP_LOCKED (0x02000)
// - MAP_POPULATE (0x08000)
// - MAP_NONBLOCK (0x10000)
// - MAP_HUGETLB (0x40000)
//
static bool AllowMmapNoexec(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 9 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 8 - 4),
/*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags
/*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x5a000),
/*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 8 - 7),
/*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L9*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The prot parameter of mprotect() may only have:
//
// - PROT_NONE (0)
// - PROT_READ (1)
// - PROT_WRITE (2)
//
static bool AllowMprotectNoexec(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mprotect, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The open() system call is permitted only when
//
// - (flags & O_ACCMODE) == O_RDONLY
//
static bool AllowOpenReadonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The flags parameter of open() must not have:
//
// - O_WRONLY (1)
// - O_RDWR (2)
//
static bool AllowOpenatReadonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The open() flags parameter must not contain
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
static bool AllowOpenWriteonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200100),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The openat() flags parameter must not contain
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
static bool AllowOpenatWriteonly(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200100),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// If the flags parameter of open() has one of:
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
// Then the mode parameter must not have:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowOpen(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 12 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 11 - 10),
/*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L12*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// If the flags parameter of openat() has one of:
//
// - O_CREAT (000000100)
// - O_TMPFILE (020200000)
//
// Then the mode parameter must not have:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowOpenat(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 12 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])),
/* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 11 - 10),
/*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L12*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of fcntl() must be one of:
//
// - F_DUPFD (0)
// - F_DUPFD_CLOEXEC (1030)
// - F_GETFD (1)
// - F_SETFD (2)
// - F_GETFL (3)
// - F_SETFL (4)
//
static bool AllowFcntl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1030, 4 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 5 - 4, 0),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The second argument of fcntl() must be one of:
//
// - F_GETLK (5)
// - F_SETLK (6)
// - F_SETLKW (7)
//
static bool AllowFcntlLock(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 0, 5 - 3),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 8, 5 - 4, 0),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The addr parameter of sendto() must be
//
// - NULL
//
static bool AllowSendtoAddrless(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sendto, 0, 7 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 0),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3),
/*L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 4),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3),
/*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L7*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The sig parameter of sigaction() must NOT be
//
// - SIGSYS (31)
//
static bool AllowSigaction(struct Filter *f) {
static const int nr = __NR_linux_sigaction;
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 5 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 31, 4 - 3, 0),
/*L3*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L5*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The family parameter of socket() must be one of:
//
// - AF_INET (2)
// - AF_INET6 (10)
//
// The type parameter of socket() will ignore:
//
// - SOCK_CLOEXEC (0x80000)
// - SOCK_NONBLOCK (0x00800)
//
// The type parameter of socket() must be one of:
//
// - SOCK_STREAM (1)
// - SOCK_DGRAM (2)
//
// The protocol parameter of socket() must be one of:
//
// - 0
// - IPPROTO_ICMP (1)
// - IPPROTO_TCP (6)
// - IPPROTO_UDP (17)
//
static bool AllowSocketInet(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 15 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 4 - 3, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 10, 0, 14 - 4),
/* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 8 - 7, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 14 - 8),
/* L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 13 - 10, 0),
/*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 13 - 11, 0),
/*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 13 - 12, 0),
/*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 17, 0, 14 - 12),
/*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L15*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The family parameter of socket() must be one of:
//
// - AF_UNIX (1)
// - AF_LOCAL (1)
//
// The type parameter of socket() will ignore:
//
// - SOCK_CLOEXEC (0x80000)
// - SOCK_NONBLOCK (0x00800)
//
// The type parameter of socket() must be one of:
//
// - SOCK_STREAM (1)
// - SOCK_DGRAM (2)
//
// The protocol parameter of socket() must be one of:
//
// - 0
//
static bool AllowSocketUnix(struct Filter *f) {
static const struct sock_filter fragment[] = {
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 11 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 10 - 3),
/* L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 7 - 6, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 10 - 7),
/* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 10 - 9),
/* L9*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L10*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L11*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The first parameter of prctl() can be any of
//
// - PR_SET_NAME (15)
// - PR_GET_NAME (16)
// - PR_GET_SECCOMP (21)
// - PR_SET_SECCOMP (22)
// - PR_SET_NO_NEW_PRIVS (38)
//
static bool AllowPrctl(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prctl, 0, 9 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 7 - 3, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 16, 7 - 4, 0),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 7 - 5, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 7 - 6, 0),
/*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 38, 0, 8 - 7),
/*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L9*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of chmod() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowChmod(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_chmod, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of fchmod() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowFchmod(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmod, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
// The mode parameter of fchmodat() can't have the following:
//
// - S_ISVTX (01000 sticky)
// - S_ISGID (02000 setgid)
// - S_ISUID (04000 setuid)
//
static bool AllowFchmodat(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmodat, 0, 6 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])),
/*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4),
/*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L6*/ /* next filter */
};
return AppendFilter(f, PLEDGE(fragment));
}
static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len, bool needmapexec,
bool needmorphing) {
int i;
for (i = 0; i < len; ++i) {
switch (p[i]) {
case __NR_linux_mmap:
if (needmapexec) {
if (!AllowMmap(f)) return false;
} else {
if (!AllowMmapNoexec(f)) return false;
}
break;
case __NR_linux_mprotect:
if (needmorphing) {
if (!AllowSyscall(f, __NR_linux_mprotect)) return false;
} else {
if (!AllowMprotectNoexec(f)) return false;
}
break;
case __NR_linux_chmod:
if (!AllowChmod(f)) return false;
break;
case __NR_linux_fchmod:
if (!AllowFchmod(f)) return false;
break;
case __NR_linux_fchmodat:
if (!AllowFchmodat(f)) return false;
break;
case __NR_linux_sigaction:
if (!AllowSigaction(f)) return false;
break;
case __NR_linux_prctl:
if (!AllowPrctl(f)) return false;
break;
case __NR_linux_open:
if (!AllowOpen(f)) return false;
break;
case __NR_linux_openat:
if (!AllowOpenat(f)) return false;
break;
case __NR_linux_open | READONLY:
if (!AllowOpenReadonly(f)) return false;
break;
case __NR_linux_openat | READONLY:
if (!AllowOpenatReadonly(f)) return false;
break;
case __NR_linux_open | WRITEONLY:
if (!AllowOpenWriteonly(f)) return false;
break;
case __NR_linux_openat | WRITEONLY:
if (!AllowOpenatWriteonly(f)) return false;
break;
case __NR_linux_setsockopt:
if (!AllowSetsockopt(f)) return false;
break;
case __NR_linux_getsockopt:
if (!AllowGetsockopt(f)) return false;
break;
case __NR_linux_fcntl:
if (!AllowFcntl(f)) return false;
break;
case __NR_linux_fcntl | LOCK:
if (!AllowFcntlLock(f)) return false;
break;
case __NR_linux_ioctl:
if (!AllowIoctl(f)) return false;
break;
case __NR_linux_ioctl | TTY:
if (!AllowIoctlTty(f)) return false;
break;
case __NR_linux_socket | INET:
if (!AllowSocketInet(f)) return false;
break;
case __NR_linux_socket | UNIX:
if (!AllowSocketUnix(f)) return false;
break;
case __NR_linux_sendto | ADDRLESS:
if (!AllowSendtoAddrless(f)) return false;
break;
default:
assert(~p[i] & ~0xfff);
if (!AllowSyscall(f, p[i])) return false;
break;
}
}
return true;
}
static int FindPromise(const struct Pledges *p, const char *name, size_t *len) {
int i;
for (i = 0; p[i].name; ++i) {
if (!strcasecmp(name, p[i].name)) {
*len = p[i].len;
return i;
}
}
return -1;
}
static int sys_pledge_linux(const char *promises, const char *execpromises) {
bool ok;
int rc = -1;
size_t plen;
int promise;
bool needmapexec;
bool needexecnative;
bool needmorphing;
struct Filter f = {0};
const uint16_t *pledge;
unsigned long ipromises = -1;
char *s, *tok, *state, *start;
if (execpromises) return einval();
needmapexec = strstr(promises, "exec");
needmorphing = strstr(promises, "thread");
needexecnative = strstr(promises, "execnative");
if ((start = s = strdup(promises)) && AppendFilter(&f, kFilterStart, ARRAYLEN(kFilterStart)) &&
(needmapexec || needexecnative || AppendOriginVerification(&f)) &&
AppendPledge(&f, kPledgeLinuxDefault, ARRAYLEN(kPledgeLinuxDefault), needmapexec,
needmorphing)) {
for (ok = true; (tok = strtok_r(start, " \t\r\n", &state)); start = 0) {
if ((promise = FindPromise(kPledgeLinux, tok, &plen)) != -1) {
pledge = kPledgeLinux[promise].syscalls;
ipromises &= ~(1ULL << promise);
} else {
ok = false;
rc = einval();
break;
}
if (!AppendPledge(&f, pledge, plen, needmapexec, needmorphing)) {
ok = false;
break;
}
}
if (ok && AppendFilter(&f, kFilterEnd, ARRAYLEN(kFilterEnd)) &&
(rc = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) != -1) {
struct sock_fprog sandbox = {.len = f.n, .filter = f.p};
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &sandbox);
}
if (!rc) {
__promises = ipromises;
}
}
free(f.p);
free(s);
return rc;
}
static void SetPromises(const char *promises) {
int promise;
size_t plen;
char *tok, *state, *start;
unsigned long ipromises = -1;
while ((tok = strtok_r(start, " \t\r\n", &state))) {
if ((promise = FindPromise(kPledgeLinux, tok, &plen)) != -1) {
ipromises &= ~(1ULL << promise);
}
start = 0;
}
__promises = ipromises;
}
/**
* Restricts system operations, e.g.
*
* pledge("stdio tty", 0);
*
* Pledging causes most system calls to become unavailable. Your system
* call policy is enforced by the kernel, which means it can propagate
* across execve() if permitted. This system call is supported on
* OpenBSD and Linux where it's polyfilled using SECCOMP BPF. The way it
* works on Linux is verboten system calls will raise EPERM whereas
* OpenBSD just kills the process while logging a helpful message to
* /var/log/messages explaining which promise category you needed.
*
* By default exit() is allowed. This is useful for processes that
* perform pure computation and interface with the parent via shared
* memory. On Linux we mean sys_exit (_Exit1), not sys_exit_group
* (_Exit). The difference is effectively meaningless, since _Exit()
* will attempt both. All it means is that, if you're using threads,
* then a `pledge("", 0)` thread can't kill all your threads unless you
* `pledge("stdio", 0)`.
*
* Once pledge is in effect, the chmod functions (if allowed) will not
* permit the sticky/setuid/setgid bits to change. Linux will EPERM here
* and OpenBSD should ignore those three bits rather than crashing.
*
* User and group IDs can't be changed once pledge is in effect. OpenBSD
* should ignore chown without crashing; whereas Linux will just EPERM.
*
* Memory functions won't permit creating executable code after pledge.
* Restrictions on origin of SYSCALL instructions will become enforced
* on Linux (cf. msyscall) after pledge too, which means the process
* gets killed if SYSCALL is used outside the .privileged section. One
* exception is if the "exec" group is specified, in which case these
* restrictions need to be loosened.
*
* Using pledge is irreversible. On Linux it causes PR_SET_NO_NEW_PRIVS
* to be set on your process; however, if "id" or "recvfd" are allowed
* then then they theoretically could permit the gaining of some new
* privileges. You may call pledge() multiple times if "stdio" is
* allowed. In that case, the process can only move towards a more
* restrictive state.
*
* pledge() can't filter file system paths or internet addresses. For
* example, if you enable a category like "inet" then your process will
* be able to talk to any internet address. The same applies to
* categories like "wpath" and "cpath"; if enabled, any path the
* effective user id is permitted to change will be changeable.
*
* `promises` is a string that may include any of the following groups
* delimited by spaces.
*
* - "stdio" allows exit, close, dup, dup2, dup3, fchdir, fstat, fsync,
* fdatasync, ftruncate, getdents, getegid, getrandom, geteuid,
* getgid, getgroups, getitimer, getpgid, getpgrp, getpid, getppid,
* getresgid, getresuid, getrlimit, getsid, wait4, gettimeofday,
* getuid, lseek, madvise, brk, arch_prctl, uname, set_tid_address,
* clock_getres, clock_gettime, clock_nanosleep, mmap (PROT_EXEC and
* weird flags aren't allowed), mprotect (PROT_EXEC isn't allowed),
* msync, munmap, nanosleep, pipe, pipe2, read, readv, pread, recv,
* poll, recvfrom, preadv, write, writev, pwrite, pwritev, select,
* send, sendto (only if addr is null), setitimer, shutdown, sigaction
* (but SIGSYS is forbidden), sigaltstack, sigprocmask, sigreturn,
* sigsuspend, umask, socketpair, ioctl(FIONREAD), ioctl(FIONBIO),
* ioctl(FIOCLEX), ioctl(FIONCLEX), fcntl(F_GETFD), fcntl(F_SETFD),
* fcntl(F_GETFL), fcntl(F_SETFL).
*
* - "rpath" (read-only path ops) allows chdir, getcwd, open(O_RDONLY),
* openat(O_RDONLY), stat, fstat, lstat, fstatat, access, faccessat,
* readlink, readlinkat, statfs, fstatfs.
*
* - "wpath" (write path ops) allows getcwd, open(O_WRONLY),
* openat(O_WRONLY), stat, fstat, lstat, fstatat, access, faccessat,
* readlink, readlinkat, chmod, fchmod, fchmodat.
*
* - "cpath" (create path ops) allows open(O_CREAT), openat(O_CREAT),
* rename, renameat, renameat2, link, linkat, symlink, symlinkat,
* unlink, rmdir, unlinkat, mkdir, mkdirat.
*
* - "dpath" (create special path ops) allows mknod, mknodat, mkfifo.
*
* - "flock" allows flock, fcntl(F_GETLK), fcntl(F_SETLK),
* fcntl(F_SETLKW).
*
* - "tty" allows ioctl(TIOCGWINSZ), ioctl(TCGETS), ioctl(TCSETS),
* ioctl(TCSETSW), ioctl(TCSETSF).
*
* - "recvfd" allows recvmsg(SCM_RIGHTS).
*
* - "fattr" allows chmod, fchmod, fchmodat, utime, utimes, futimens,
* utimensat.
*
* - "inet" allows socket(AF_INET), listen, bind, connect, accept,
* accept4, getpeername, getsockname, setsockopt, getsockopt, sendto.
*
* - "unix" allows socket(AF_UNIX), listen, bind, connect, accept,
* accept4, getpeername, getsockname, setsockopt, getsockopt.
*
* - "dns" allows socket(AF_INET), sendto, recvfrom, connect.
*
* - "proc" allows fork, vfork, kill, getpriority, setpriority, prlimit,
* setrlimit, setpgid, setsid.
*
* - "thread" allows clone, futex, and permits PROT_EXEC in mprotect.
*
* - "id" allows setuid, setreuid, setresuid, setgid, setregid,
* setresgid, setgroups, prlimit, setrlimit, getpriority, setpriority,
* setfsuid, setfsgid.
*
* - "exec" allows execve, execveat, access, faccessat. On Linux this
* also weakens some security to permit running APE binaries. However
* on OpenBSD they must be assimilate beforehand. On Linux, mmap()
* will be loosened up to allow creating PROT_EXEC memory (for APE
* loader) and system call origin verification won't be activated.
*
* - "execnative" allows execve, execveat. Can only be used to run
* native executables; you won't be able to run APE binaries. mmap()
* and mprotect() are still prevented from creating executable memory.
* System call origin verification can't be enabled. If you always
* assimilate your APE binaries, then this should be preferred.
*
* @return 0 on success, or -1 w/ errno
* @raise ENOSYS if host os isn't Linux or OpenBSD
* @raise EINVAL if `execpromises` is used on Linux
*/
int pledge(const char *promises, const char *execpromises) {
int rc;
if (IsLinux()) {
rc = sys_pledge_linux(promises, execpromises);
} else {
rc = sys_pledge(promises, execpromises);
if (!rc) {
SetPromises(promises);
}
}
STRACE("pledge(%#s, %#s) → %d% m", promises, execpromises, rc);
return rc;
}