/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ │vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ ╞══════════════════════════════════════════════════════════════════════════════╡ │ Copyright 2022 Justine Alexandra Roberts Tunney │ │ │ │ Permission to use, copy, modify, and/or distribute this software for │ │ any purpose with or without fee is hereby granted, provided that the │ │ above copyright notice and this permission notice appear in all copies. │ │ │ │ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ │ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ │ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ │ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ │ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ │ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ #include "libc/calls/calls.h" #include "libc/calls/pledge.internal.h" #include "libc/calls/struct/bpf.h" #include "libc/calls/struct/filter.h" #include "libc/calls/struct/seccomp.h" #include "libc/calls/struct/sigaction.h" #include "libc/calls/syscall_support-sysv.internal.h" #include "libc/intrin/likely.h" #include "libc/intrin/promises.internal.h" #include "libc/macros.internal.h" #include "libc/nexgen32e/bsr.h" #include "libc/runtime/runtime.h" #include "libc/runtime/stack.h" #include "libc/sysv/consts/audit.h" #include "libc/sysv/consts/nrlinux.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/pr.h" #include "libc/sysv/consts/prot.h" /** * @fileoverview OpenBSD pledge() Polyfill Payload for GNU/Systemd * * This file contains only the minimum amount of Linux-specific code * that's necessary to get a pledge() policy installed. This file is * designed to not use static or tls memory or libc depnedencies, so * it can be transplanted into codebases and injected into programs. */ #define Eperm 1 #define Sigabrt 6 #define Einval 22 #define Sigsys 31 #define Enosys 38 #define Sig_Setmask 2 #define Sa_Siginfo 4 #define Sa_Restorer 0x04000000 #define Sa_Restart 0x10000000 #define SPECIAL 0xf000 #define SELF 0x8000 #define ADDRLESS 0x2000 #define INET 0x2000 #define LOCK 0x4000 #define NOEXEC 0x8000 #define EXEC 0x4000 #define READONLY 0x8000 #define WRITEONLY 0x4000 #define CREATONLY 0x2000 #define STDIO 0x8000 #define THREAD 0x8000 #define TTY 0x8000 #define UNIX 0x4000 #define NOBITS 0x8000 #define RESTRICT 0x1000 #define PLEDGE(pledge) pledge, ARRAYLEN(pledge) #define OFF(f) offsetof(struct seccomp_data, f) #define AbortPledge(reason) \ do { \ asm("hlt"); \ unreachable; \ } while (0) struct Filter { size_t n; struct sock_filter p[700]; }; static const struct thatispacked SyscallName { uint16_t n; const char *const s; } kSyscallName[] = { {__NR_linux_exit, "exit"}, // {__NR_linux_exit_group, "exit_group"}, // {__NR_linux_read, "read"}, // {__NR_linux_write, "write"}, // {__NR_linux_open, "open"}, // {__NR_linux_close, "close"}, // {__NR_linux_stat, "stat"}, // {__NR_linux_fstat, "fstat"}, // {__NR_linux_lstat, "lstat"}, // {__NR_linux_poll, "poll"}, // {__NR_linux_ppoll, "ppoll"}, // {__NR_linux_brk, "brk"}, // {__NR_linux_sigreturn, "sigreturn"}, // {__NR_linux_lseek, "lseek"}, // {__NR_linux_mmap, "mmap"}, // {__NR_linux_msync, "msync"}, // {__NR_linux_mprotect, "mprotect"}, // {__NR_linux_munmap, "munmap"}, // {__NR_linux_sigaction, "sigaction"}, // {__NR_linux_sigprocmask, "sigprocmask"}, // {__NR_linux_ioctl, "ioctl"}, // {__NR_linux_pread, "pread"}, // {__NR_linux_pwrite, "pwrite"}, // {__NR_linux_readv, "readv"}, // {__NR_linux_writev, "writev"}, // {__NR_linux_access, "access"}, // {__NR_linux_pipe, "pipe"}, // {__NR_linux_select, "select"}, // {__NR_linux_pselect6, "pselect6"}, // {__NR_linux_sched_yield, "sched_yield"}, // {__NR_linux_mremap, "mremap"}, // {__NR_linux_mincore, "mincore"}, // {__NR_linux_madvise, "madvise"}, // {__NR_linux_shmget, "shmget"}, // {__NR_linux_shmat, "shmat"}, // {__NR_linux_shmctl, "shmctl"}, // {__NR_linux_dup, "dup"}, // {__NR_linux_dup2, "dup2"}, // {__NR_linux_pause, "pause"}, // {__NR_linux_nanosleep, "nanosleep"}, // {__NR_linux_getitimer, "getitimer"}, // {__NR_linux_setitimer, "setitimer"}, // {__NR_linux_alarm, "alarm"}, // {__NR_linux_getpid, "getpid"}, // {__NR_linux_sendfile, "sendfile"}, // {__NR_linux_socket, "socket"}, // {__NR_linux_connect, "connect"}, // {__NR_linux_accept, "accept"}, // {__NR_linux_sendto, "sendto"}, // {__NR_linux_recvfrom, "recvfrom"}, // {__NR_linux_sendmsg, "sendmsg"}, // {__NR_linux_recvmsg, "recvmsg"}, // {__NR_linux_shutdown, "shutdown"}, // {__NR_linux_bind, "bind"}, // {__NR_linux_listen, "listen"}, // {__NR_linux_getsockname, "getsockname"}, // {__NR_linux_getpeername, "getpeername"}, // {__NR_linux_socketpair, "socketpair"}, // {__NR_linux_setsockopt, "setsockopt"}, // {__NR_linux_getsockopt, "getsockopt"}, // {__NR_linux_fork, "fork"}, // {__NR_linux_vfork, "vfork"}, // {__NR_linux_execve, "execve"}, // {__NR_linux_wait4, "wait4"}, // {__NR_linux_kill, "kill"}, // {__NR_linux_clone, "clone"}, // {__NR_linux_tkill, "tkill"}, // {__NR_linux_futex, "futex"}, // {__NR_linux_set_robust_list, "set_robust_list"}, // {__NR_linux_get_robust_list, "get_robust_list"}, // {__NR_linux_uname, "uname"}, // {__NR_linux_semget, "semget"}, // {__NR_linux_semop, "semop"}, // {__NR_linux_semctl, "semctl"}, // {__NR_linux_shmdt, "shmdt"}, // {__NR_linux_msgget, "msgget"}, // {__NR_linux_msgsnd, "msgsnd"}, // {__NR_linux_msgrcv, "msgrcv"}, // {__NR_linux_msgctl, "msgctl"}, // {__NR_linux_fcntl, "fcntl"}, // {__NR_linux_flock, "flock"}, // {__NR_linux_fsync, "fsync"}, // {__NR_linux_fdatasync, "fdatasync"}, // {__NR_linux_truncate, "truncate"}, // {__NR_linux_ftruncate, "ftruncate"}, // {__NR_linux_getcwd, "getcwd"}, // {__NR_linux_chdir, "chdir"}, // {__NR_linux_fchdir, "fchdir"}, // {__NR_linux_rename, "rename"}, // {__NR_linux_mkdir, "mkdir"}, // {__NR_linux_rmdir, "rmdir"}, // {__NR_linux_creat, "creat"}, // {__NR_linux_link, "link"}, // {__NR_linux_unlink, "unlink"}, // {__NR_linux_symlink, "symlink"}, // {__NR_linux_readlink, "readlink"}, // {__NR_linux_chmod, "chmod"}, // {__NR_linux_fchmod, "fchmod"}, // {__NR_linux_chown, "chown"}, // {__NR_linux_fchown, "fchown"}, // {__NR_linux_lchown, "lchown"}, // {__NR_linux_umask, "umask"}, // {__NR_linux_gettimeofday, "gettimeofday"}, // {__NR_linux_getrlimit, "getrlimit"}, // {__NR_linux_getrusage, "getrusage"}, // {__NR_linux_sysinfo, "sysinfo"}, // {__NR_linux_times, "times"}, // {__NR_linux_ptrace, "ptrace"}, // {__NR_linux_syslog, "syslog"}, // {__NR_linux_getuid, "getuid"}, // {__NR_linux_getgid, "getgid"}, // {__NR_linux_getppid, "getppid"}, // {__NR_linux_getpgrp, "getpgrp"}, // {__NR_linux_setsid, "setsid"}, // {__NR_linux_getsid, "getsid"}, // {__NR_linux_getpgid, "getpgid"}, // {__NR_linux_setpgid, "setpgid"}, // {__NR_linux_geteuid, "geteuid"}, // {__NR_linux_getegid, "getegid"}, // {__NR_linux_getgroups, "getgroups"}, // {__NR_linux_setgroups, "setgroups"}, // {__NR_linux_setreuid, "setreuid"}, // {__NR_linux_setregid, "setregid"}, // {__NR_linux_setuid, "setuid"}, // {__NR_linux_setgid, "setgid"}, // {__NR_linux_setresuid, "setresuid"}, // {__NR_linux_setresgid, "setresgid"}, // {__NR_linux_getresuid, "getresuid"}, // {__NR_linux_getresgid, "getresgid"}, // {__NR_linux_sigpending, "sigpending"}, // {__NR_linux_sigsuspend, "sigsuspend"}, // {__NR_linux_sigaltstack, "sigaltstack"}, // {__NR_linux_mknod, "mknod"}, // {__NR_linux_mknodat, "mknodat"}, // {__NR_linux_statfs, "statfs"}, // {__NR_linux_fstatfs, "fstatfs"}, // {__NR_linux_getpriority, "getpriority"}, // {__NR_linux_setpriority, "setpriority"}, // {__NR_linux_mlock, "mlock"}, // {__NR_linux_munlock, "munlock"}, // {__NR_linux_mlockall, "mlockall"}, // {__NR_linux_munlockall, "munlockall"}, // {__NR_linux_setrlimit, "setrlimit"}, // {__NR_linux_chroot, "chroot"}, // {__NR_linux_sync, "sync"}, // {__NR_linux_acct, "acct"}, // {__NR_linux_settimeofday, "settimeofday"}, // {__NR_linux_mount, "mount"}, // {__NR_linux_reboot, "reboot"}, // {__NR_linux_quotactl, "quotactl"}, // {__NR_linux_setfsuid, "setfsuid"}, // {__NR_linux_setfsgid, "setfsgid"}, // {__NR_linux_capget, "capget"}, // {__NR_linux_capset, "capset"}, // {__NR_linux_sigtimedwait, "sigtimedwait"}, // {__NR_linux_rt_sigqueueinfo, "rt_sigqueueinfo"}, // {__NR_linux_personality, "personality"}, // {__NR_linux_ustat, "ustat"}, // {__NR_linux_sysfs, "sysfs"}, // {__NR_linux_sched_setparam, "sched_setparam"}, // {__NR_linux_sched_getparam, "sched_getparam"}, // {__NR_linux_sched_setscheduler, "sched_setscheduler"}, // {__NR_linux_sched_getscheduler, "sched_getscheduler"}, // {__NR_linux_sched_get_priority_max, "sched_get_priority_max"}, // {__NR_linux_sched_get_priority_min, "sched_get_priority_min"}, // {__NR_linux_sched_rr_get_interval, "sched_rr_get_interval"}, // {__NR_linux_vhangup, "vhangup"}, // {__NR_linux_modify_ldt, "modify_ldt"}, // {__NR_linux_pivot_root, "pivot_root"}, // {__NR_linux__sysctl, "_sysctl"}, // {__NR_linux_prctl, "prctl"}, // {__NR_linux_arch_prctl, "arch_prctl"}, // {__NR_linux_adjtimex, "adjtimex"}, // {__NR_linux_umount2, "umount2"}, // {__NR_linux_swapon, "swapon"}, // {__NR_linux_swapoff, "swapoff"}, // {__NR_linux_sethostname, "sethostname"}, // {__NR_linux_setdomainname, "setdomainname"}, // {__NR_linux_iopl, "iopl"}, // {__NR_linux_ioperm, "ioperm"}, // {__NR_linux_init_module, "init_module"}, // {__NR_linux_delete_module, "delete_module"}, // {__NR_linux_gettid, "gettid"}, // {__NR_linux_readahead, "readahead"}, // {__NR_linux_setxattr, "setxattr"}, // {__NR_linux_fsetxattr, "fsetxattr"}, // {__NR_linux_getxattr, "getxattr"}, // {__NR_linux_fgetxattr, "fgetxattr"}, // {__NR_linux_listxattr, "listxattr"}, // {__NR_linux_flistxattr, "flistxattr"}, // {__NR_linux_removexattr, "removexattr"}, // {__NR_linux_fremovexattr, "fremovexattr"}, // {__NR_linux_lsetxattr, "lsetxattr"}, // {__NR_linux_lgetxattr, "lgetxattr"}, // {__NR_linux_llistxattr, "llistxattr"}, // {__NR_linux_lremovexattr, "lremovexattr"}, // {__NR_linux_sched_setaffinity, "sched_setaffinity"}, // {__NR_linux_sched_getaffinity, "sched_getaffinity"}, // {__NR_linux_io_setup, "io_setup"}, // {__NR_linux_io_destroy, "io_destroy"}, // {__NR_linux_io_getevents, "io_getevents"}, // {__NR_linux_io_submit, "io_submit"}, // {__NR_linux_io_cancel, "io_cancel"}, // {__NR_linux_lookup_dcookie, "lookup_dcookie"}, // {__NR_linux_epoll_create, "epoll_create"}, // {__NR_linux_epoll_wait, "epoll_wait"}, // {__NR_linux_epoll_ctl, "epoll_ctl"}, // {__NR_linux_getdents, "getdents"}, // {__NR_linux_set_tid_address, "set_tid_address"}, // {__NR_linux_restart_syscall, "restart_syscall"}, // {__NR_linux_semtimedop, "semtimedop"}, // {__NR_linux_fadvise, "fadvise"}, // {__NR_linux_timer_create, "timer_create"}, // {__NR_linux_timer_settime, "timer_settime"}, // {__NR_linux_timer_gettime, "timer_gettime"}, // {__NR_linux_timer_getoverrun, "timer_getoverrun"}, // {__NR_linux_timer_delete, "timer_delete"}, // {__NR_linux_clock_settime, "clock_settime"}, // {__NR_linux_clock_gettime, "clock_gettime"}, // {__NR_linux_clock_getres, "clock_getres"}, // {__NR_linux_clock_nanosleep, "clock_nanosleep"}, // {__NR_linux_tgkill, "tgkill"}, // {__NR_linux_mbind, "mbind"}, // {__NR_linux_set_mempolicy, "set_mempolicy"}, // {__NR_linux_get_mempolicy, "get_mempolicy"}, // {__NR_linux_mq_open, "mq_open"}, // {__NR_linux_mq_unlink, "mq_unlink"}, // {__NR_linux_mq_timedsend, "mq_timedsend"}, // {__NR_linux_mq_timedreceive, "mq_timedreceive"}, // {__NR_linux_mq_notify, "mq_notify"}, // {__NR_linux_mq_getsetattr, "mq_getsetattr"}, // {__NR_linux_kexec_load, "kexec_load"}, // {__NR_linux_waitid, "waitid"}, // {__NR_linux_add_key, "add_key"}, // {__NR_linux_request_key, "request_key"}, // {__NR_linux_keyctl, "keyctl"}, // {__NR_linux_ioprio_set, "ioprio_set"}, // {__NR_linux_ioprio_get, "ioprio_get"}, // {__NR_linux_inotify_init, "inotify_init"}, // {__NR_linux_inotify_add_watch, "inotify_add_watch"}, // {__NR_linux_inotify_rm_watch, "inotify_rm_watch"}, // {__NR_linux_openat, "openat"}, // {__NR_linux_mkdirat, "mkdirat"}, // {__NR_linux_fchownat, "fchownat"}, // {__NR_linux_utime, "utime"}, // {__NR_linux_utimes, "utimes"}, // {__NR_linux_futimesat, "futimesat"}, // {__NR_linux_fstatat, "fstatat"}, // {__NR_linux_unlinkat, "unlinkat"}, // {__NR_linux_renameat, "renameat"}, // {__NR_linux_linkat, "linkat"}, // {__NR_linux_symlinkat, "symlinkat"}, // {__NR_linux_readlinkat, "readlinkat"}, // {__NR_linux_fchmodat, "fchmodat"}, // {__NR_linux_faccessat, "faccessat"}, // {__NR_linux_unshare, "unshare"}, // {__NR_linux_splice, "splice"}, // {__NR_linux_tee, "tee"}, // {__NR_linux_sync_file_range, "sync_file_range"}, // {__NR_linux_vmsplice, "vmsplice"}, // {__NR_linux_migrate_pages, "migrate_pages"}, // {__NR_linux_move_pages, "move_pages"}, // {__NR_linux_preadv, "preadv"}, // {__NR_linux_pwritev, "pwritev"}, // {__NR_linux_utimensat, "utimensat"}, // {__NR_linux_fallocate, "fallocate"}, // {__NR_linux_accept4, "accept4"}, // {__NR_linux_dup3, "dup3"}, // {__NR_linux_pipe2, "pipe2"}, // {__NR_linux_epoll_pwait, "epoll_pwait"}, // {__NR_linux_epoll_create1, "epoll_create1"}, // {__NR_linux_perf_event_open, "perf_event_open"}, // {__NR_linux_inotify_init1, "inotify_init1"}, // {__NR_linux_rt_tgsigqueueinfo, "rt_tgsigqueueinfo"}, // {__NR_linux_signalfd, "signalfd"}, // {__NR_linux_signalfd4, "signalfd4"}, // {__NR_linux_eventfd, "eventfd"}, // {__NR_linux_eventfd2, "eventfd2"}, // {__NR_linux_timerfd_create, "timerfd_create"}, // {__NR_linux_timerfd_settime, "timerfd_settime"}, // {__NR_linux_timerfd_gettime, "timerfd_gettime"}, // {__NR_linux_recvmmsg, "recvmmsg"}, // {__NR_linux_fanotify_init, "fanotify_init"}, // {__NR_linux_fanotify_mark, "fanotify_mark"}, // {__NR_linux_prlimit, "prlimit"}, // {__NR_linux_name_to_handle_at, "name_to_handle_at"}, // {__NR_linux_open_by_handle_at, "open_by_handle_at"}, // {__NR_linux_clock_adjtime, "clock_adjtime"}, // {__NR_linux_syncfs, "syncfs"}, // {__NR_linux_sendmmsg, "sendmmsg"}, // {__NR_linux_setns, "setns"}, // {__NR_linux_getcpu, "getcpu"}, // {__NR_linux_process_vm_readv, "process_vm_readv"}, // {__NR_linux_process_vm_writev, "process_vm_writev"}, // {__NR_linux_kcmp, "kcmp"}, // {__NR_linux_finit_module, "finit_module"}, // {__NR_linux_sched_setattr, "sched_setattr"}, // {__NR_linux_sched_getattr, "sched_getattr"}, // {__NR_linux_renameat2, "renameat2"}, // {__NR_linux_seccomp, "seccomp"}, // {__NR_linux_getrandom, "getrandom"}, // {__NR_linux_memfd_create, "memfd_create"}, // {__NR_linux_kexec_file_load, "kexec_file_load"}, // {__NR_linux_bpf, "bpf"}, // {__NR_linux_execveat, "execveat"}, // {__NR_linux_userfaultfd, "userfaultfd"}, // {__NR_linux_membarrier, "membarrier"}, // {__NR_linux_mlock2, "mlock2"}, // {__NR_linux_copy_file_range, "copy_file_range"}, // {__NR_linux_preadv2, "preadv2"}, // {__NR_linux_pwritev2, "pwritev2"}, // {__NR_linux_pkey_mprotect, "pkey_mprotect"}, // {__NR_linux_pkey_alloc, "pkey_alloc"}, // {__NR_linux_pkey_free, "pkey_free"}, // {__NR_linux_statx, "statx"}, // {__NR_linux_io_pgetevents, "io_pgetevents"}, // {__NR_linux_rseq, "rseq"}, // {__NR_linux_pidfd_send_signal, "pidfd_send_signal"}, // {__NR_linux_io_uring_setup, "io_uring_setup"}, // {__NR_linux_io_uring_enter, "io_uring_enter"}, // {__NR_linux_io_uring_register, "io_uring_register"}, // {__NR_linux_open_tree, "open_tree"}, // {__NR_linux_move_mount, "move_mount"}, // {__NR_linux_fsopen, "fsopen"}, // {__NR_linux_fsconfig, "fsconfig"}, // {__NR_linux_fsmount, "fsmount"}, // {__NR_linux_fspick, "fspick"}, // {__NR_linux_pidfd_open, "pidfd_open"}, // {__NR_linux_clone3, "clone3"}, // {__NR_linux_close_range, "close_range"}, // {__NR_linux_openat2, "openat2"}, // {__NR_linux_pidfd_getfd, "pidfd_getfd"}, // {__NR_linux_faccessat2, "faccessat2"}, // {__NR_linux_process_madvise, "process_madvise"}, // {__NR_linux_epoll_pwait2, "epoll_pwait2"}, // {__NR_linux_mount_setattr, "mount_setattr"}, // {__NR_linux_quotactl_fd, "quotactl_fd"}, // {__NR_linux_landlock_create_ruleset, "landlock_create_ruleset"}, // {__NR_linux_landlock_add_rule, "landlock_add_rule"}, // {__NR_linux_landlock_restrict_self, "landlock_restrict_self"}, // {__NR_linux_memfd_secret, "memfd_secret"}, // {__NR_linux_process_mrelease, "process_mrelease"}, // {__NR_linux_futex_waitv, "futex_waitv"}, // {__NR_linux_set_mempolicy_home_node, "set_mempolicy_home_node"}, // }; static const uint16_t kPledgeDefault[] = { __NR_linux_exit, // thread return / exit() }; // stdio contains all the benign system calls. openbsd makes the // assumption that preexisting file descriptors are trustworthy. we // implement checking for these as a simple linear scan rather than // binary search, since there doesn't appear to be any measurable // difference in the latency of sched_yield() if it's at the start of // the bpf script or the end. static const uint16_t kPledgeStdio[] = { __NR_linux_sigreturn, // __NR_linux_restart_syscall, // __NR_linux_exit_group, // __NR_linux_sched_yield, // __NR_linux_sched_getaffinity, // __NR_linux_clock_getres, // __NR_linux_clock_gettime, // __NR_linux_clock_nanosleep, // __NR_linux_close_range, // __NR_linux_close, // __NR_linux_write, // __NR_linux_writev, // __NR_linux_pwrite, // __NR_linux_pwritev, // __NR_linux_pwritev2, // __NR_linux_read, // __NR_linux_readv, // __NR_linux_pread, // __NR_linux_preadv, // __NR_linux_preadv2, // __NR_linux_dup, // __NR_linux_dup2, // __NR_linux_dup3, // __NR_linux_fchdir, // __NR_linux_fcntl | STDIO, // __NR_linux_fstat, // __NR_linux_fsync, // __NR_linux_sysinfo, // __NR_linux_fdatasync, // __NR_linux_ftruncate, // __NR_linux_getrandom, // __NR_linux_getgroups, // __NR_linux_getpgid, // __NR_linux_getpgrp, // __NR_linux_getpid, // __NR_linux_gettid, // __NR_linux_getuid, // __NR_linux_getgid, // __NR_linux_getsid, // __NR_linux_getppid, // __NR_linux_geteuid, // __NR_linux_getegid, // __NR_linux_getrlimit, // __NR_linux_getresgid, // __NR_linux_getresuid, // __NR_linux_getitimer, // __NR_linux_setitimer, // __NR_linux_timerfd_create, // __NR_linux_timerfd_settime, // __NR_linux_timerfd_gettime, // __NR_linux_copy_file_range, // __NR_linux_gettimeofday, // __NR_linux_sendfile, // __NR_linux_vmsplice, // __NR_linux_splice, // __NR_linux_lseek, // __NR_linux_tee, // __NR_linux_brk, // __NR_linux_msync, // __NR_linux_mmap | NOEXEC, // __NR_linux_mremap, // __NR_linux_munmap, // __NR_linux_mincore, // __NR_linux_madvise, // __NR_linux_fadvise, // __NR_linux_mprotect | NOEXEC, // __NR_linux_arch_prctl, // __NR_linux_migrate_pages, // __NR_linux_sync_file_range, // __NR_linux_set_tid_address, // __NR_linux_membarrier, // __NR_linux_nanosleep, // __NR_linux_pipe, // __NR_linux_pipe2, // __NR_linux_poll, // __NR_linux_ppoll, // __NR_linux_select, // __NR_linux_pselect6, // __NR_linux_epoll_create, // __NR_linux_epoll_create1, // __NR_linux_epoll_ctl, // __NR_linux_epoll_wait, // __NR_linux_epoll_pwait, // __NR_linux_epoll_pwait2, // __NR_linux_recvfrom, // __NR_linux_sendto | ADDRLESS, // __NR_linux_ioctl | RESTRICT, // __NR_linux_alarm, // __NR_linux_pause, // __NR_linux_shutdown, // __NR_linux_eventfd, // __NR_linux_eventfd2, // __NR_linux_signalfd, // __NR_linux_signalfd4, // __NR_linux_sigaction, // __NR_linux_sigaltstack, // __NR_linux_sigprocmask, // __NR_linux_sigsuspend, // __NR_linux_sigpending, // __NR_linux_kill | SELF, // __NR_linux_tkill | SELF, // __NR_linux_socketpair, // __NR_linux_getrusage, // __NR_linux_times, // __NR_linux_umask, // __NR_linux_wait4, // __NR_linux_uname, // __NR_linux_prctl | STDIO, // __NR_linux_clone | THREAD, // __NR_linux_futex, // __NR_linux_set_robust_list, // __NR_linux_get_robust_list, // __NR_linux_prlimit | STDIO, // }; static const uint16_t kPledgeFlock[] = { __NR_linux_flock, // __NR_linux_fcntl | LOCK, // }; static const uint16_t kPledgeRpath[] = { __NR_linux_chdir, // __NR_linux_getcwd, // __NR_linux_open | READONLY, // __NR_linux_openat | READONLY, // __NR_linux_stat, // __NR_linux_lstat, // __NR_linux_fstat, // __NR_linux_fstatat, // __NR_linux_access, // __NR_linux_faccessat, // __NR_linux_faccessat2, // __NR_linux_readlink, // __NR_linux_readlinkat, // __NR_linux_statfs, // __NR_linux_fstatfs, // __NR_linux_getdents, // }; static const uint16_t kPledgeWpath[] = { __NR_linux_getcwd, // __NR_linux_open | WRITEONLY, // __NR_linux_openat | WRITEONLY, // __NR_linux_stat, // __NR_linux_fstat, // __NR_linux_lstat, // __NR_linux_fstatat, // __NR_linux_access, // __NR_linux_truncate, // __NR_linux_faccessat, // __NR_linux_faccessat2, // __NR_linux_readlinkat, // __NR_linux_chmod | NOBITS, // __NR_linux_fchmod | NOBITS, // __NR_linux_fchmodat | NOBITS, // }; static const uint16_t kPledgeCpath[] = { __NR_linux_open | CREATONLY, // __NR_linux_openat | CREATONLY, // __NR_linux_creat | RESTRICT, // __NR_linux_rename, // __NR_linux_renameat, // __NR_linux_renameat2, // __NR_linux_link, // __NR_linux_linkat, // __NR_linux_symlink, // __NR_linux_symlinkat, // __NR_linux_rmdir, // __NR_linux_unlink, // __NR_linux_unlinkat, // __NR_linux_mkdir, // __NR_linux_mkdirat, // }; static const uint16_t kPledgeDpath[] = { __NR_linux_mknod, // __NR_linux_mknodat, // }; static const uint16_t kPledgeFattr[] = { __NR_linux_chmod | NOBITS, // __NR_linux_fchmod | NOBITS, // __NR_linux_fchmodat | NOBITS, // __NR_linux_utime, // __NR_linux_utimes, // __NR_linux_futimesat, // __NR_linux_utimensat, // }; static const uint16_t kPledgeInet[] = { __NR_linux_socket | INET, // __NR_linux_listen, // __NR_linux_bind, // __NR_linux_sendto, // __NR_linux_connect, // __NR_linux_accept, // __NR_linux_accept4, // __NR_linux_ioctl | INET, // __NR_linux_getsockopt | RESTRICT, // __NR_linux_setsockopt | RESTRICT, // __NR_linux_getpeername, // __NR_linux_getsockname, // }; static const uint16_t kPledgeUnix[] = { __NR_linux_socket | UNIX, // __NR_linux_listen, // __NR_linux_bind, // __NR_linux_connect, // __NR_linux_sendto, // __NR_linux_accept, // __NR_linux_accept4, // __NR_linux_getsockopt | RESTRICT, // __NR_linux_setsockopt | RESTRICT, // __NR_linux_getpeername, // __NR_linux_getsockname, // }; static const uint16_t kPledgeDns[] = { __NR_linux_socket | INET, // __NR_linux_bind, // __NR_linux_sendto, // __NR_linux_connect, // __NR_linux_recvfrom, // __NR_linux_fstatat, // __NR_linux_openat | READONLY, // __NR_linux_read, // __NR_linux_close, // }; static const uint16_t kPledgeTty[] = { __NR_linux_ioctl | TTY, // }; static const uint16_t kPledgeRecvfd[] = { __NR_linux_recvmsg, // __NR_linux_recvmmsg, // }; static const uint16_t kPledgeSendfd[] = { __NR_linux_sendmsg, // __NR_linux_sendmmsg, // }; static const uint16_t kPledgeProc[] = { __NR_linux_fork, // __NR_linux_vfork, // __NR_linux_clone | RESTRICT, // __NR_linux_kill, // __NR_linux_setsid, // __NR_linux_setpgid, // __NR_linux_prlimit, // __NR_linux_setrlimit, // __NR_linux_getpriority, // __NR_linux_setpriority, // __NR_linux_ioprio_get, // __NR_linux_ioprio_set, // __NR_linux_sched_getscheduler, // __NR_linux_sched_setscheduler, // __NR_linux_sched_get_priority_min, // __NR_linux_sched_get_priority_max, // __NR_linux_sched_getaffinity, // __NR_linux_sched_setaffinity, // __NR_linux_sched_getparam, // __NR_linux_sched_setparam, // __NR_linux_tkill, // __NR_linux_tgkill, // }; static const uint16_t kPledgeId[] = { __NR_linux_setuid, // __NR_linux_setreuid, // __NR_linux_setresuid, // __NR_linux_setgid, // __NR_linux_setregid, // __NR_linux_setresgid, // __NR_linux_setgroups, // __NR_linux_prlimit, // __NR_linux_setrlimit, // __NR_linux_getpriority, // __NR_linux_setpriority, // __NR_linux_setfsuid, // __NR_linux_setfsgid, // }; static const uint16_t kPledgeChown[] = { __NR_linux_chown, // __NR_linux_fchown, // __NR_linux_lchown, // __NR_linux_fchownat, // }; static const uint16_t kPledgeSettime[] = { __NR_linux_settimeofday, // __NR_linux_clock_adjtime, // }; static const uint16_t kPledgeProtExec[] = { __NR_linux_mmap | EXEC, // __NR_linux_mprotect, // }; static const uint16_t kPledgeExec[] = { __NR_linux_execve, // __NR_linux_execveat, // }; static const uint16_t kPledgeUnveil[] = { __NR_linux_landlock_create_ruleset, // __NR_linux_landlock_add_rule, // __NR_linux_landlock_restrict_self, // }; // placeholder group // // pledge.com checks this to do auto-unveiling static const uint16_t kPledgeVminfo[] = { __NR_linux_sched_yield, // }; // placeholder group // // pledge.com uses this to auto-unveil /tmp and $TMPPATH with rwc // permissions. pledge() alone (without unveil() too) offers very // little security here. consider using them together. static const uint16_t kPledgeTmppath[] = { __NR_linux_lstat, // __NR_linux_unlink, // __NR_linux_unlinkat, // }; const struct Pledges kPledge[PROMISE_LEN_] = { [PROMISE_STDIO] = {"stdio", PLEDGE(kPledgeStdio)}, // [PROMISE_RPATH] = {"rpath", PLEDGE(kPledgeRpath)}, // [PROMISE_WPATH] = {"wpath", PLEDGE(kPledgeWpath)}, // [PROMISE_CPATH] = {"cpath", PLEDGE(kPledgeCpath)}, // [PROMISE_DPATH] = {"dpath", PLEDGE(kPledgeDpath)}, // [PROMISE_FLOCK] = {"flock", PLEDGE(kPledgeFlock)}, // [PROMISE_FATTR] = {"fattr", PLEDGE(kPledgeFattr)}, // [PROMISE_INET] = {"inet", PLEDGE(kPledgeInet)}, // [PROMISE_UNIX] = {"unix", PLEDGE(kPledgeUnix)}, // [PROMISE_DNS] = {"dns", PLEDGE(kPledgeDns)}, // [PROMISE_TTY] = {"tty", PLEDGE(kPledgeTty)}, // [PROMISE_RECVFD] = {"recvfd", PLEDGE(kPledgeRecvfd)}, // [PROMISE_SENDFD] = {"sendfd", PLEDGE(kPledgeSendfd)}, // [PROMISE_PROC] = {"proc", PLEDGE(kPledgeProc)}, // [PROMISE_EXEC] = {"exec", PLEDGE(kPledgeExec)}, // [PROMISE_ID] = {"id", PLEDGE(kPledgeId)}, // [PROMISE_UNVEIL] = {"unveil", PLEDGE(kPledgeUnveil)}, // [PROMISE_SETTIME] = {"settime", PLEDGE(kPledgeSettime)}, // [PROMISE_PROT_EXEC] = {"prot_exec", PLEDGE(kPledgeProtExec)}, // [PROMISE_VMINFO] = {"vminfo", PLEDGE(kPledgeVminfo)}, // [PROMISE_TMPPATH] = {"tmppath", PLEDGE(kPledgeTmppath)}, // [PROMISE_CHOWN] = {"chown", PLEDGE(kPledgeChown)}, // }; static const struct sock_filter kPledgeStart[] = { // make sure this isn't an i386 binary or something BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS), // each filter assumes ordinal is already loaded into accumulator BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), // forbid some system calls with ENOSYS (rather than EPERM) BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 5, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_rseq, 4, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_memfd_create, 3, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat2, 2, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone3, 1, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_statx, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (Enosys & SECCOMP_RET_DATA)), }; static const struct sock_filter kFilterIgnoreExitGroup[] = { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_exit_group, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (Eperm & SECCOMP_RET_DATA)), }; static privileged unsigned long StrLen(const char *s) { unsigned long n = 0; while (*s++) ++n; return n; } static privileged void *MemCpy(void *d, const void *s, unsigned long n) { unsigned long i = 0; for (; i < n; ++i) ((char *)d)[i] = ((char *)s)[i]; return (char *)d + n; } static privileged char *FixCpy(char p[17], uint64_t x, int k) { while (k > 0) *p++ = "0123456789abcdef"[(x >> (k -= 4)) & 15]; *p = '\0'; return p; } static privileged char *HexCpy(char p[17], uint64_t x) { return FixCpy(p, x, ROUNDUP(x ? bsrl(x) + 1 : 1, 4)); } static privileged int GetPid(void) { int ax; asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_getpid) : "rcx", "r11", "memory"); return ax; } static privileged int GetTid(void) { int ax; asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_gettid) : "rcx", "r11", "memory"); return ax; } static privileged void Log(const char *s, ...) { int ax; va_list va; va_start(va, s); do { asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_write), "D"(2), "S"(s), "d"(StrLen(s)) : "rcx", "r11", "memory"); } while ((s = va_arg(va, const char *))); va_end(va); } static privileged int Prctl(int op, long a, void *b, long c, long d) { int rc; asm volatile("mov\t%5,%%r10\n\t" "mov\t%6,%%r8\n\t" "syscall" : "=a"(rc) : "0"(__NR_linux_prctl), "D"(op), "S"(a), "d"(b), "g"(c), "g"(d) : "rcx", "r8", "r10", "r11", "memory"); return rc; } static privileged int SigAction(int sig, struct sigaction *act, struct sigaction *old) { int ax; act->sa_flags |= Sa_Restorer; act->sa_restorer = &__restore_rt; asm volatile("mov\t%5,%%r10\n\t" "syscall" : "=a"(ax) : "0"(__NR_linux_sigaction), "D"(sig), "S"(act), "d"(old), "g"(8) : "rcx", "r10", "r11", "memory"); return ax; } static privileged int SigProcMask(int how, int64_t set, int64_t *old) { int ax; asm volatile("mov\t%5,%%r10\n\t" "syscall" : "=a"(ax) : "0"(__NR_linux_sigprocmask), "D"(how), "S"(&set), "d"(old), "g"(8) : "rcx", "r10", "r11", "memory"); return ax; } static privileged void KillThisProcess(void) { int ax; SigAction(Sigabrt, &(struct sigaction){0}, 0); SigProcMask(Sig_Setmask, -1, 0); asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_kill), "D"(GetPid()), "S"(Sigabrt) : "rcx", "r11", "memory"); SigProcMask(Sig_Setmask, 0, 0); asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_exit_group), "D"(128 + Sigabrt) : "rcx", "r11", "memory"); } static privileged void KillThisThread(void) { int ax; SigAction(Sigabrt, &(struct sigaction){0}, 0); SigProcMask(Sig_Setmask, -1, 0); asm volatile("syscall" : "=a"(ax) : "0"(__NR_linux_tkill), "D"(GetTid()), "S"(Sigabrt) : "rcx", "r11", "memory"); SigProcMask(Sig_Setmask, 0, 0); asm volatile("syscall" : /* no outputs */ : "a"(__NR_linux_exit), "D"(128 + Sigabrt) : "rcx", "r11", "memory"); } static privileged const char *GetSyscallName(uint16_t n) { int i; for (i = 0; i < ARRAYLEN(kSyscallName); ++i) { if (kSyscallName[i].n == n) { return kSyscallName[i].s; } } return "unknown"; } static privileged int HasSyscall(struct Pledges *p, uint16_t n) { int i; for (i = 0; i < p->len; ++i) { if (p->syscalls[i] == n) { return 1; } if ((p->syscalls[i] & 0xfff) == n) { return 2; } } return 0; } static privileged void OnSigSys(int sig, siginfo_t *si, void *vctx) { bool found; char ord[17], rip[17]; int i, ok, mode = si->si_errno; ucontext_t *ctx = vctx; ctx->uc_mcontext.rax = -Eperm; FixCpy(ord, si->si_syscall, 12); HexCpy(rip, ctx->uc_mcontext.rip); for (found = i = 0; i < ARRAYLEN(kPledge); ++i) { if (HasSyscall(kPledge + i, si->si_syscall)) { Log("error: pledge ", kPledge[i].name, " for ", GetSyscallName(si->si_syscall), " (ord=", ord, " rip=", rip, ")\n", 0); found = true; } } if (!found) { Log("error: bad syscall (", GetSyscallName(si->si_syscall), " ord=", ord, " rip=", rip, ")\n", 0); } switch (mode & PLEDGE_PENALTY_MASK) { case PLEDGE_PENALTY_KILL_PROCESS: KillThisProcess(); // fallthrough case PLEDGE_PENALTY_KILL_THREAD: KillThisThread(); unreachable; default: break; } } static privileged void MonitorSigSys(void) { int ax; struct sigaction sa = { .sa_sigaction = OnSigSys, .sa_flags = Sa_Siginfo | Sa_Restart, }; // we block changing sigsys once pledge is installed // so we aren't terribly concerned if this will fail if (SigAction(Sigsys, &sa, 0) == -1) asm("hlt"); } static privileged void AppendFilter(struct Filter *f, struct sock_filter *p, size_t n) { if (UNLIKELY(f->n + n > ARRAYLEN(f->p))) { AbortPledge("need to increase array size"); } MemCpy(f->p + f->n, p, n * sizeof(*f->p)); f->n += n; } // The first argument of kill() must be // // - getpid() // static privileged void AllowKillSelf(struct Filter *f) { struct sock_filter fragment[] = { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_kill, 0, 4), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, GetPid(), 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), }; AppendFilter(f, PLEDGE(fragment)); } // The first argument of tkill() must be // // - gettid() // static privileged void AllowTkillSelf(struct Filter *f) { struct sock_filter fragment[] = { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_tkill, 0, 4), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, GetTid(), 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), }; AppendFilter(f, PLEDGE(fragment)); } // The following system calls are allowed: // // - write(2) to allow logging // - kill(getpid(), SIGABRT) to abort process // - tkill(gettid(), SIGABRT) to abort thread // - sigaction(SIGABRT) to force default signal handler // - sigreturn() to return from signal handler // - sigprocmask() to force signal delivery // static privileged void AllowMonitor(struct Filter *f) { struct sock_filter fragment[] = { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_write, 0, 4), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_kill, 0, 6), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, GetPid(), 0, 3), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, Sigabrt, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_tkill, 0, 6), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, GetTid(), 0, 3), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, Sigabrt, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sigaction, 0, 4), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, Sigabrt, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sigreturn, 1, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sigprocmask, 0, 1), BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), }; AppendFilter(f, PLEDGE(fragment)); } // SYSCALL is only allowed in the .privileged section // We assume program image is loaded in 32-bit spaces static privileged void AppendOriginVerification(struct Filter *f) { long x = (long)__privileged_start; long y = (long)__privileged_end; struct sock_filter fragment[] = { /*L0*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer) + 4), /*L1*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 2), /*L2*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(instruction_pointer)), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, x, 0, 5 - 4), /*L4*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, y, 0, 6 - 5), /*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), /*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L7*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The first argument of sys_clone_linux() must NOT have: // // - CLONE_NEWNS (0x00020000) // - CLONE_PTRACE (0x00002000) // - CLONE_UNTRACED (0x00800000) // static privileged void AllowCloneRestrict(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00822000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The first argument of sys_clone_linux() must have: // // - CLONE_VM (0x00000100) // - CLONE_FS (0x00000200) // - CLONE_FILES (0x00000400) // - CLONE_THREAD (0x00010000) // - CLONE_SIGHAND (0x00000800) // // The first argument of sys_clone_linux() must NOT have: // // - CLONE_NEWNS (0x00020000) // - CLONE_PTRACE (0x00002000) // - CLONE_UNTRACED (0x00800000) // static privileged void AllowCloneThread(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone, 0, 9 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00010f00), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x00010f00, 0, 8 - 4), /*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00822000), /*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L9*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The second argument of ioctl() must be one of: // // - FIONREAD (0x541b) // - FIONBIO (0x5421) // - FIOCLEX (0x5451) // - FIONCLEX (0x5450) // static privileged void AllowIoctlStdio(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 8 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x541b, 3, 0), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5421, 2, 0), /*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5451, 1, 0), /*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5450, 0, 1), /*L6*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L8*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The second argument of ioctl() must be one of: // // - SIOCATMARK (0x8905) // static privileged void AllowIoctlInet(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 4), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x8905, 0, 1), /*L6*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L8*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The second argument of ioctl() must be one of: // // - TCGETS (0x5401) // - TCSETS (0x5402) // - TCSETSW (0x5403) // - TCSETSF (0x5404) // - TIOCGWINSZ (0x5413) // - TIOCSPGRP (0x5410) // - TIOCGPGRP (0x540f) // - TIOCSWINSZ (0x5414) // - TCFLSH (0x540b) // - TCXONC (0x540a) // - TCSBRK (0x5409) // - TIOCSBRK (0x5427) // static privileged void AllowIoctlTty(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_ioctl, 0, 16 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5401, 11, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5402, 10, 0), /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5403, 9, 0), /* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5404, 8, 0), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5413, 7, 0), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5410, 6, 0), /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540f, 5, 0), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5414, 4, 0), /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540b, 3, 0), /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x540a, 2, 0), /*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5409, 1, 0), /*L13*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x5427, 0, 1), /*L14*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L15*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L16*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The level argument of setsockopt() must be one of: // // - SOL_IP (0) // - SOL_SOCKET (1) // - SOL_TCP (6) // // The optname argument of setsockopt() must be one of: // // - TCP_NODELAY (0x01) // - TCP_CORK (0x03) // - TCP_KEEPIDLE (0x04) // - TCP_KEEPINTVL (0x05) // - SO_TYPE (0x03) // - SO_ERROR (0x04) // - SO_DONTROUTE (0x05) // - SO_REUSEPORT (0x0f) // - SO_REUSEADDR (0x02) // - SO_KEEPALIVE (0x09) // - SO_RCVTIMEO (0x14) // - SO_SNDTIMEO (0x15) // - IP_RECVTTL (0x0c) // - IP_RECVERR (0x0b) // - TCP_FASTOPEN (0x17) // - TCP_FASTOPEN_CONNECT (0x1e) // static privileged void AllowSetsockoptRestrict(struct Filter *f) { static const int nr = __NR_linux_setsockopt; static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 21 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 2, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 1, 0), /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 20 - 5), /* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0f, 13, 0), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x03, 12, 0), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0c, 11, 0), /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x13, 10, 0), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x02, 9, 0), /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x09, 8, 0), /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x14, 7, 0), /*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x01, 6, 0), /*L13*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0b, 5, 0), /*L14*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x04, 4, 0), /*L15*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x05, 3, 0), /*L16*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x17, 2, 0), /*L17*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x1e, 1, 0), /*L18*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x15, 0, 1), /*L19*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L20*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L21*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The level argument of getsockopt() must be one of: // // - SOL_SOCKET (1) // - SOL_TCP (6) // // The optname argument of getsockopt() must be one of: // // - SO_TYPE (0x03) // - SO_ERROR (0x04) // - SO_REUSEPORT (0x0f) // - SO_REUSEADDR (0x02) // - SO_KEEPALIVE (0x09) // - SO_RCVTIMEO (0x14) // - SO_SNDTIMEO (0x15) // static privileged void AllowGetsockoptRestrict(struct Filter *f) { static const int nr = __NR_linux_getsockopt; static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 14 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 1, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 13 - 4), /* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x03, 6, 0), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x04, 5, 0), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0f, 4, 0), /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x02, 3, 0), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x09, 2, 0), /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x14, 1, 0), /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x15, 0, 1), /*L12*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L13*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L14*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The flags parameter of mmap() must not have: // // - MAP_LOCKED (0x02000) // - MAP_NONBLOCK (0x10000) // - MAP_HUGETLB (0x40000) // static privileged void AllowMmapExec(struct Filter *f) { long y = (long)__privileged_end; static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x52000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 5 - 4), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The prot parameter of mmap() may only have: // // - PROT_NONE (0) // - PROT_READ (1) // - PROT_WRITE (2) // // The flags parameter must not have: // // - MAP_LOCKED (0x02000) // - MAP_POPULATE (0x08000) // - MAP_NONBLOCK (0x10000) // - MAP_HUGETLB (0x40000) // static privileged void AllowMmapNoexec(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mmap, 0, 9 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 8 - 4), /*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), // flags /*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x5a000), /*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L9*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The prot parameter of mprotect() may only have: // // - PROT_NONE (0) // - PROT_READ (1) // - PROT_WRITE (2) // static privileged void AllowMprotectNoexec(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_mprotect, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), // prot /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~(PROT_READ | PROT_WRITE)), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The open() system call is permitted only when // // - (flags & O_ACCMODE) == O_RDONLY // // The flags parameter of open() must not have: // // - O_CREAT (000000100) // - O_TRUNC (000001000) // - __O_TMPFILE (020000000) // static privileged void AllowOpenReadonly(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 9 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 8 - 4), /*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020001100), /*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L9*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The open() system call is permitted only when // // - (flags & O_ACCMODE) == O_RDONLY // // The flags parameter of open() must not have: // // - O_CREAT (000000100) // - O_TRUNC (000001000) // - __O_TMPFILE (020000000) // static privileged void AllowOpenatReadonly(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 9 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDONLY, 0, 8 - 4), /*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020001100), /*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L9*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The open() system call is permitted only when // // - (flags & O_ACCMODE) == O_WRONLY // - (flags & O_ACCMODE) == O_RDWR // // The open() flags parameter must not contain // // - O_CREAT (000000100) // - __O_TMPFILE (020000000) // static privileged void AllowOpenWriteonly(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 10 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_WRONLY, 1, 0), /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDWR, 0, 9 - 5), /* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L6*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020000100), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /* L8*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* L9*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L10*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The open() system call is permitted only when // // - (flags & O_ACCMODE) == O_WRONLY // - (flags & O_ACCMODE) == O_RDWR // // The openat() flags parameter must not contain // // - O_CREAT (000000100) // - __O_TMPFILE (020000000) // static privileged void AllowOpenatWriteonly(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 10 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, O_ACCMODE), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_WRONLY, 1, 0), /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, O_RDWR, 0, 9 - 5), /* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L6*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020000100), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /* L8*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /* L9*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L10*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // If the flags parameter of open() has one of: // // - O_CREAT (000000100) // - __O_TMPFILE (020000000) // // Then the mode parameter must not have: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowOpenCreatonly(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_open, 0, 12 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0), /* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7), /* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L12*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // If the flags parameter of openat() has one of: // // - O_CREAT (000000100) // - __O_TMPFILE (020000000) // // Then the mode parameter must not have: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowOpenatCreatonly(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat, 0, 12 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 000000100), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 000000100, 7 - 4, 0), /* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 020200000), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 020200000, 0, 10 - 7), /* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[3])), /* L8*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L10*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L11*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L12*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // Then the mode parameter must not have: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowCreatRestrict(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_creat, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The second argument of fcntl() must be one of: // // - F_DUPFD (0) // - F_DUPFD_CLOEXEC (1030) // - F_GETFD (1) // - F_SETFD (2) // - F_GETFL (3) // - F_SETFL (4) // static privileged void AllowFcntlStdio(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1030, 4 - 3, 0), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 5 - 4, 0), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The second argument of fcntl() must be one of: // // - F_GETLK (5) // - F_SETLK (6) // - F_SETLKW (7) // static privileged void AllowFcntlLock(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fcntl, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 5, 0, 5 - 3), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, 8, 5 - 4, 0), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The addr parameter of sendto() must be // // - NULL // static privileged void AllowSendtoAddrless(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_sendto, 0, 7 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 0), /*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3), /*L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[4]) + 4), /*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 5), /*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L7*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The family parameter of socket() must be one of: // // - AF_INET (0x02) // - AF_INET6 (0x0a) // // The type parameter of socket() will ignore: // // - SOCK_CLOEXEC (0x80000) // - SOCK_NONBLOCK (0x00800) // // The type parameter of socket() must be one of: // // - SOCK_STREAM (0x01) // - SOCK_DGRAM (0x02) // // The protocol parameter of socket() must be one of: // // - 0 // - IPPROTO_ICMP (0x01) // - IPPROTO_TCP (0x06) // - IPPROTO_UDP (0x11) // static privileged void AllowSocketInet(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 15 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x02, 1, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x0a, 0, 14 - 4), /* L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x01, 1, 0), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x02, 0, 14 - 8), /* L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x00, 3, 0), /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x01, 2, 0), /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x06, 1, 0), /*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x11, 0, 1), /*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L15*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The family parameter of socket() must be one of: // // - AF_UNIX (1) // - AF_LOCAL (1) // // The type parameter of socket() will ignore: // // - SOCK_CLOEXEC (0x80000) // - SOCK_NONBLOCK (0x00800) // // The type parameter of socket() must be one of: // // - SOCK_STREAM (1) // - SOCK_DGRAM (2) // // The protocol parameter of socket() must be one of: // // - 0 // static privileged void AllowSocketUnix(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_socket, 0, 11 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, 10 - 3), /* L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, ~0x80800), /* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 1, 0), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 0, 10 - 7), /* L7*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /* L9*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L10*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L11*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The first parameter of prctl() can be any of // // - PR_SET_NAME (15) // - PR_GET_NAME (16) // - PR_GET_SECCOMP (21) // - PR_SET_SECCOMP (22) // - PR_SET_NO_NEW_PRIVS (38) // - PR_CAPBSET_READ (23) // - PR_CAPBSET_DROP (24) // static privileged void AllowPrctlStdio(struct Filter *f) { static const struct sock_filter fragment[] = { /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prctl, 0, 11 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 6, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 16, 5, 0), /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 4, 0), /* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 3, 0), /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 23, 2, 0), /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 24, 1, 0), /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 38, 0, 1), /* L9*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L10*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L11*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The mode parameter of chmod() can't have the following: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowChmodNobits(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_chmod, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The mode parameter of fchmod() can't have the following: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowFchmodNobits(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmod, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The mode parameter of fchmodat() can't have the following: // // - S_ISVTX (01000 sticky) // - S_ISGID (02000 setgid) // - S_ISUID (04000 setuid) // static privileged void AllowFchmodatNobits(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_fchmodat, 0, 6 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 07000), /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L6*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } // The new_limit parameter of prlimit() must be // // - NULL (0) // static privileged void AllowPrlimitStdio(struct Filter *f) { static const struct sock_filter fragment[] = { /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prlimit, 0, 7 - 1), /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), /*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 6 - 3), /*L3*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2]) + 4), /*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), /*L5*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), /*L6*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), /*L7*/ /* next filter */ }; AppendFilter(f, PLEDGE(fragment)); } static privileged int CountUnspecial(const uint16_t *p, size_t len) { int i, count; for (count = i = 0; i < len; ++i) { if (!(p[i] & SPECIAL)) { ++count; } } return count; } static privileged void AppendPledge(struct Filter *f, // const uint16_t *p, // size_t len) { // int i, j, count; // handle ordinals which allow syscalls regardless of args // we put in extra effort here to reduce num of bpf instrs if ((count = CountUnspecial(p, len))) { if (count < 256) { for (j = i = 0; i < len; ++i) { if (p[i] & SPECIAL) continue; // jump to ALLOW rule below if accumulator equals ordinal struct sock_filter fragment[] = { BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, // instruction p[i], // operand count - j - 1, // jump if true displacement j == count - 1), // jump if false displacement }; AppendFilter(f, PLEDGE(fragment)); ++j; } struct sock_filter fragment[] = { BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), }; AppendFilter(f, PLEDGE(fragment)); } else { AbortPledge("list of ordinals exceeds max displacement"); } } // handle "special" ordinals which use hand-crafted bpf for (i = 0; i < len; ++i) { if (!(p[i] & SPECIAL)) continue; switch (p[i]) { case __NR_linux_mmap | EXEC: AllowMmapExec(f); break; case __NR_linux_mmap | NOEXEC: AllowMmapNoexec(f); break; case __NR_linux_mprotect | NOEXEC: AllowMprotectNoexec(f); break; case __NR_linux_chmod | NOBITS: AllowChmodNobits(f); break; case __NR_linux_fchmod | NOBITS: AllowFchmodNobits(f); break; case __NR_linux_fchmodat | NOBITS: AllowFchmodatNobits(f); break; case __NR_linux_prctl | STDIO: AllowPrctlStdio(f); break; case __NR_linux_open | CREATONLY: AllowOpenCreatonly(f); break; case __NR_linux_openat | CREATONLY: AllowOpenatCreatonly(f); break; case __NR_linux_open | READONLY: AllowOpenReadonly(f); break; case __NR_linux_openat | READONLY: AllowOpenatReadonly(f); break; case __NR_linux_open | WRITEONLY: AllowOpenWriteonly(f); break; case __NR_linux_openat | WRITEONLY: AllowOpenatWriteonly(f); break; case __NR_linux_setsockopt | RESTRICT: AllowSetsockoptRestrict(f); break; case __NR_linux_getsockopt | RESTRICT: AllowGetsockoptRestrict(f); break; case __NR_linux_creat | RESTRICT: AllowCreatRestrict(f); break; case __NR_linux_fcntl | STDIO: AllowFcntlStdio(f); break; case __NR_linux_fcntl | LOCK: AllowFcntlLock(f); break; case __NR_linux_ioctl | RESTRICT: AllowIoctlStdio(f); break; case __NR_linux_ioctl | TTY: AllowIoctlTty(f); break; case __NR_linux_ioctl | INET: AllowIoctlInet(f); break; case __NR_linux_socket | INET: AllowSocketInet(f); break; case __NR_linux_socket | UNIX: AllowSocketUnix(f); break; case __NR_linux_sendto | ADDRLESS: AllowSendtoAddrless(f); break; case __NR_linux_clone | RESTRICT: AllowCloneRestrict(f); break; case __NR_linux_clone | THREAD: AllowCloneThread(f); break; case __NR_linux_prlimit | STDIO: AllowPrlimitStdio(f); break; case __NR_linux_kill | SELF: AllowKillSelf(f); break; case __NR_linux_tkill | SELF: AllowTkillSelf(f); break; default: AbortPledge("switch forgot to define a special ordinal"); } } } /** * Installs SECCOMP BPF filter on Linux thread. * * @param ipromises is inverted integer bitmask of pledge() promises * @return 0 on success, or negative error number on error * @asyncsignalsafe * @threadsafe * @vforksafe */ privileged int sys_pledge_linux(unsigned long ipromises, int mode) { struct Filter f; int i, e, rc = -1; struct sock_filter sf[1] = {BPF_STMT(BPF_RET | BPF_K, 0)}; CheckLargeStackAllocation(&f, sizeof(f)); f.n = 0; // set up the seccomp filter AppendFilter(&f, PLEDGE(kPledgeStart)); if (ipromises == -1) { // if we're pledging empty string, then avoid triggering a sigsys // when _Exit() gets called since we need to fallback to _Exit1() AppendFilter(&f, PLEDGE(kFilterIgnoreExitGroup)); } AppendPledge(&f, PLEDGE(kPledgeDefault)); for (i = 0; i < ARRAYLEN(kPledge); ++i) { if (~ipromises & (1ul << i)) { if (kPledge[i].len) { AppendPledge(&f, kPledge[i].syscalls, kPledge[i].len); } else { AbortPledge("bad ipromises"); } } } // now determine what we'll do on sandbox violations if (mode & PLEDGE_STDERR_LOGGING) { // trapping mode // // if we haven't pledged exec, then we can monitor SIGSYS // and print a helpful error message when things do break // to avoid tls / static memory, we embed mode within bpf MonitorSigSys(); AllowMonitor(&f); sf[0].k = SECCOMP_RET_TRAP | (mode & SECCOMP_RET_DATA); AppendFilter(&f, PLEDGE(sf)); } else { // non-trapping mode // // our sigsys error message handler can't be inherited across // execve() boundaries so if you've pledged exec then that'll // likely cause a SIGSYS in your child after the exec happens switch (mode & PLEDGE_PENALTY_MASK) { case PLEDGE_PENALTY_KILL_THREAD: sf[0].k = SECCOMP_RET_KILL_THREAD; break; case PLEDGE_PENALTY_KILL_PROCESS: sf[0].k = SECCOMP_RET_KILL_PROCESS; break; case PLEDGE_PENALTY_RETURN_EPERM: sf[0].k = SECCOMP_RET_ERRNO | Eperm; break; default: unreachable; } AppendFilter(&f, PLEDGE(sf)); } // drop privileges // // PR_SET_SECCOMP (Linux 2.6.23+) will refuse to work if // PR_SET_NO_NEW_PRIVS (Linux 3.5+) wasn't called so we punt the error // detection to the seccomp system call below. Prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); // register our seccomp filter with the kernel struct sock_fprog sandbox = {.len = f.n, .filter = f.p}; rc = Prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &sandbox, 0, 0); // the EINVAL error could mean a lot of things. it could mean the bpf // code is broken. it could also mean we're running on RHEL5 which // doesn't have SECCOMP support. since we don't consider lack of // system support for security to be an error, we distinguish these // two cases by running a simpler SECCOMP operation. if (rc == -Einval && Prctl(PR_GET_SECCOMP, 0, 0, 0, 0) == -Einval) { rc = 0; // -Enosys } return rc; }