2022-04-28 16:42:36 +00:00
|
|
|
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
|
|
|
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
|
|
|
|
╞══════════════════════════════════════════════════════════════════════════════╡
|
|
|
|
│ Copyright 2022 Justine Alexandra Roberts Tunney │
|
|
|
|
│ │
|
|
|
|
│ Permission to use, copy, modify, and/or distribute this software for │
|
|
|
|
│ any purpose with or without fee is hereby granted, provided that the │
|
|
|
|
│ above copyright notice and this permission notice appear in all copies. │
|
|
|
|
│ │
|
|
|
|
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
|
|
|
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
|
|
|
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
|
|
|
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
|
|
|
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
|
|
|
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
|
|
|
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
|
|
|
│ PERFORMANCE OF THIS SOFTWARE. │
|
|
|
|
╚─────────────────────────────────────────────────────────────────────────────*/
|
|
|
|
#include "libc/calls/calls.h"
|
2022-08-08 18:41:08 +00:00
|
|
|
#include "libc/calls/pledge.internal.h"
|
|
|
|
#include "libc/calls/state.internal.h"
|
2022-05-23 22:06:11 +00:00
|
|
|
#include "libc/calls/syscall-sysv.internal.h"
|
2022-08-08 18:41:08 +00:00
|
|
|
#include "libc/dce.h"
|
|
|
|
#include "libc/errno.h"
|
2022-07-16 01:07:34 +00:00
|
|
|
#include "libc/intrin/promises.internal.h"
|
2022-10-03 22:05:33 +00:00
|
|
|
#include "libc/intrin/strace.internal.h"
|
2022-11-11 05:52:47 +00:00
|
|
|
#include "libc/nexgen32e/vendor.internal.h"
|
2022-06-27 20:01:58 +00:00
|
|
|
#include "libc/runtime/runtime.h"
|
2022-04-28 16:42:36 +00:00
|
|
|
#include "libc/sysv/errfuns.h"
|
2022-06-27 20:01:58 +00:00
|
|
|
|
2022-04-28 16:42:36 +00:00
|
|
|
/**
|
2022-08-10 19:56:45 +00:00
|
|
|
* Permits system operations, e.g.
|
2022-06-27 20:01:58 +00:00
|
|
|
*
|
2022-08-16 02:52:00 +00:00
|
|
|
* __pledge_mode = PLEDGE_PENALTY_KILL_PROCESS | PLEDGE_STDERR_LOGGING;
|
2022-08-10 19:56:45 +00:00
|
|
|
* if (pledge("stdio rfile tty", 0)) {
|
|
|
|
* perror("pledge");
|
|
|
|
* exit(1);
|
|
|
|
* }
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-08 13:29:24 +00:00
|
|
|
* Pledging causes most system calls to become unavailable. Your system
|
2022-08-08 18:41:08 +00:00
|
|
|
* call policy is enforced by the kernel (which means it can propagate
|
|
|
|
* across execve() if permitted). Root access is not required. Support
|
2022-08-10 19:56:45 +00:00
|
|
|
* is limited to Linux 2.6.23+ (c. RHEL6) and OpenBSD. If your kernel
|
|
|
|
* isn't supported, then pledge() will return 0 and do nothing rather
|
|
|
|
* than raising ENOSYS. We don't consider lack of system support to be
|
|
|
|
* an error, because the specified operations will be permitted.
|
2022-08-08 18:41:08 +00:00
|
|
|
*
|
|
|
|
* The promises you give pledge() define which system calls are allowed.
|
2022-08-11 18:27:25 +00:00
|
|
|
* Error messages are logged when sandbox violations occur, but how that
|
|
|
|
* happens depends on the `mode` parameter (see below).
|
2022-08-08 18:41:08 +00:00
|
|
|
*
|
|
|
|
* Timing is everything with pledge. It's designed to be a voluntary
|
|
|
|
* self-imposed security model. That works best when programs perform
|
|
|
|
* permission-hungry operations (e.g. calling GetSymbolTable) towards
|
|
|
|
* the beginning of execution, and then relinquish privilege afterwards
|
|
|
|
* by calling pledge(). Here's an example of where that matters. Your
|
|
|
|
* Cosmopolitan C Library needs to code morph your executable in memory
|
|
|
|
* once you start using threads. But that's only possible to do if you
|
|
|
|
* used the `prot_exec` promise. So the right thing to do here, is to
|
|
|
|
* call __enable_threads() before calling pledge() to force it early.
|
2022-07-25 02:40:32 +00:00
|
|
|
*
|
|
|
|
* __enable_threads();
|
|
|
|
* ShowCrashReports();
|
|
|
|
* pledge("...", 0);
|
|
|
|
*
|
2022-07-16 03:47:20 +00:00
|
|
|
* By default exit() is allowed. This is useful for processes that
|
|
|
|
* perform pure computation and interface with the parent via shared
|
|
|
|
* memory. On Linux we mean sys_exit (_Exit1), not sys_exit_group
|
|
|
|
* (_Exit). The difference is effectively meaningless, since _Exit()
|
|
|
|
* will attempt both. All it means is that, if you're using threads,
|
|
|
|
* then a `pledge("", 0)` thread can't kill all your threads unless you
|
|
|
|
* `pledge("stdio", 0)`.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* Once pledge is in effect, the chmod functions (if allowed) will not
|
|
|
|
* permit the sticky/setuid/setgid bits to change. Linux will EPERM here
|
|
|
|
* and OpenBSD should ignore those three bits rather than crashing.
|
|
|
|
*
|
2022-07-08 13:29:24 +00:00
|
|
|
* User and group IDs can't be changed once pledge is in effect. OpenBSD
|
|
|
|
* should ignore chown without crashing; whereas Linux will just EPERM.
|
2022-06-27 20:01:58 +00:00
|
|
|
*
|
2022-07-08 13:29:24 +00:00
|
|
|
* Using pledge is irreversible. On Linux it causes PR_SET_NO_NEW_PRIVS
|
|
|
|
* to be set on your process; however, if "id" or "recvfd" are allowed
|
|
|
|
* then then they theoretically could permit the gaining of some new
|
|
|
|
* privileges. You may call pledge() multiple times if "stdio" is
|
|
|
|
* allowed. In that case, the process can only move towards a more
|
|
|
|
* restrictive state.
|
|
|
|
*
|
2022-08-08 18:41:08 +00:00
|
|
|
* pledge() can't filter filesystem paths. See unveil() which lets you
|
|
|
|
* do that. pledge() also can't do address firewalling. For example if
|
|
|
|
* you use the `inet` promise then your process will be able to talk to
|
|
|
|
* *every* internet address including public ones.
|
2022-07-08 13:29:24 +00:00
|
|
|
*
|
2022-04-28 16:42:36 +00:00
|
|
|
* `promises` is a string that may include any of the following groups
|
|
|
|
* delimited by spaces.
|
|
|
|
*
|
2022-07-14 11:32:33 +00:00
|
|
|
* - "stdio" allows exit, close, dup, dup2, dup3, fchdir, fstat, fsync,
|
2022-07-08 13:29:24 +00:00
|
|
|
* fdatasync, ftruncate, getdents, getegid, getrandom, geteuid,
|
2022-07-20 04:18:33 +00:00
|
|
|
* getgid, getgroups, times, getrusage, getitimer, getpgid, getpgrp,
|
|
|
|
* getpid, getppid, getresgid, getresuid, getrlimit, getsid, wait4,
|
|
|
|
* gettimeofday, getuid, lseek, madvise, brk, arch_prctl, uname,
|
2022-07-22 20:44:00 +00:00
|
|
|
* set_tid_address, clock_getres, clock_gettime, clock_nanosleep,
|
|
|
|
* mremap, mmap, (PROT_EXEC and weird flags aren't allowed), mprotect
|
|
|
|
* (PROT_EXEC isn't allowed), msync, sync_file_range, migrate_pages,
|
|
|
|
* munmap, nanosleep, pipe, pipe2, read, readv, pread, recv, poll,
|
|
|
|
* recvfrom, preadv, write, writev, pwrite, pwritev, select, pselect6,
|
2022-07-20 04:18:33 +00:00
|
|
|
* copy_file_range, sendfile, tee, splice, vmsplice, alarm, pause,
|
|
|
|
* send, sendto (only if addr is null), setitimer, shutdown, sigaction
|
|
|
|
* (but SIGSYS is forbidden), sigaltstack, sigprocmask, sigreturn,
|
|
|
|
* sigsuspend, umask, mincore, socketpair, ioctl(FIONREAD),
|
2022-07-19 09:54:10 +00:00
|
|
|
* ioctl(FIONBIO), ioctl(FIOCLEX), ioctl(FIONCLEX), fcntl(F_GETFD),
|
|
|
|
* fcntl(F_SETFD), fcntl(F_GETFL), fcntl(F_SETFL), sched_yield,
|
|
|
|
* epoll_create, epoll_create1, epoll_ctl, epoll_wait, epoll_pwait,
|
2022-07-20 04:18:33 +00:00
|
|
|
* epoll_pwait2, clone(CLONE_THREAD), futex, set_robust_list,
|
2022-10-03 22:05:33 +00:00
|
|
|
* get_robust_list, setaffinity, sigpending.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "rpath" (read-only path ops) allows chdir, getcwd, open(O_RDONLY),
|
2022-07-20 04:18:33 +00:00
|
|
|
* openat(O_RDONLY), stat, fstat, lstat, fstatat, access, faccessat,
|
|
|
|
* faccessat2, readlink, readlinkat, statfs, fstatfs.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-13 10:08:16 +00:00
|
|
|
* - "wpath" (write path ops) allows getcwd, open(O_WRONLY),
|
2022-07-20 04:18:33 +00:00
|
|
|
* openat(O_WRONLY), stat, fstat, lstat, fstatat, access, faccessat,
|
|
|
|
* faccessat2, readlink, readlinkat, chmod, fchmod, fchmodat.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-13 10:08:16 +00:00
|
|
|
* - "cpath" (create path ops) allows open(O_CREAT), openat(O_CREAT),
|
|
|
|
* rename, renameat, renameat2, link, linkat, symlink, symlinkat,
|
|
|
|
* unlink, rmdir, unlinkat, mkdir, mkdirat.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-08 13:29:24 +00:00
|
|
|
* - "dpath" (create special path ops) allows mknod, mknodat, mkfifo.
|
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "flock" allows flock, fcntl(F_GETLK), fcntl(F_SETLK),
|
|
|
|
* fcntl(F_SETLKW).
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "tty" allows ioctl(TIOCGWINSZ), ioctl(TCGETS), ioctl(TCSETS),
|
|
|
|
* ioctl(TCSETSW), ioctl(TCSETSF).
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "recvfd" allows recvmsg and recvmmsg.
|
2022-07-18 14:23:15 +00:00
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "recvfd" allows sendmsg and sendmmsg.
|
2022-07-08 13:29:24 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "fattr" allows chmod, fchmod, fchmodat, utime, utimes, futimens,
|
|
|
|
* utimensat.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "inet" allows socket(AF_INET), listen, bind, connect, accept,
|
|
|
|
* accept4, getpeername, getsockname, setsockopt, getsockopt, sendto.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "unix" allows socket(AF_UNIX), listen, bind, connect, accept,
|
|
|
|
* accept4, getpeername, getsockname, setsockopt, getsockopt.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* - "dns" allows socket(AF_INET), sendto, recvfrom, connect.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "proc" allows fork, vfork, clone, kill, tgkill, getpriority,
|
|
|
|
* setpriority, prlimit, setrlimit, setpgid, setsid.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
|
|
|
* - "id" allows setuid, setreuid, setresuid, setgid, setregid,
|
2022-07-13 10:08:16 +00:00
|
|
|
* setresgid, setgroups, prlimit, setrlimit, getpriority, setpriority,
|
|
|
|
* setfsuid, setfsgid.
|
2022-06-27 20:01:58 +00:00
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "settime" allows settimeofday and clock_adjtime.
|
|
|
|
*
|
2022-08-11 18:27:25 +00:00
|
|
|
* - "exec" allows execve, execveat. Note that `exec` alone might not be
|
|
|
|
* enough by itself to let your executable be executed. For dynamic,
|
|
|
|
* interpreted, and ape binaries, you'll usually want `rpath` and
|
|
|
|
* `prot_exec` too. With APE it's possible to work around this
|
|
|
|
* requirement, by "assimilating" your binaries beforehand. See the
|
|
|
|
* assimilate.com program and `--assimilate` flag which can be used to
|
|
|
|
* turn APE binaries into static native binaries.
|
2022-04-28 16:42:36 +00:00
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "prot_exec" allows mmap(PROT_EXEC) and mprotect(PROT_EXEC). This is
|
|
|
|
* needed to (1) code morph mutexes in __enable_threads(), and it's
|
|
|
|
* needed to (2) launch non-static or non-native executables, e.g.
|
|
|
|
* non-assimilated APE binaries, or dynamic-linked executables.
|
2022-07-13 10:08:16 +00:00
|
|
|
*
|
2022-07-18 09:11:06 +00:00
|
|
|
* - "unveil" allows unveil() to be called, as well as the underlying
|
|
|
|
* landlock_create_ruleset, landlock_add_rule, landlock_restrict_self
|
|
|
|
* calls on Linux.
|
|
|
|
*
|
2022-07-20 04:18:33 +00:00
|
|
|
* - "vminfo" OpenBSD defines this for programs like `top`. On Linux,
|
|
|
|
* this is a placeholder group that lets tools like pledge.com check
|
|
|
|
* `__promises` and automatically unveil() a subset of files top would
|
|
|
|
* need, e.g. /proc/stat, /proc/meminfo.
|
|
|
|
*
|
2022-07-22 20:44:00 +00:00
|
|
|
* - "tmppath" allows unlink, unlinkat, and lstat. This is mostly a
|
|
|
|
* placeholder group for pledge.com, which reads the `__promises`
|
|
|
|
* global to determine if /tmp and $TMPPATH should be unveiled.
|
|
|
|
*
|
2022-07-23 00:07:25 +00:00
|
|
|
* `execpromises` only matters if "exec" is specified in `promises`. In
|
|
|
|
* that case, this specifies the promises that'll apply once execve()
|
|
|
|
* happens. If this is NULL then the default is used, which is
|
|
|
|
* unrestricted. OpenBSD allows child processes to escape the sandbox
|
|
|
|
* (so a pledged OpenSSH server process can do things like spawn a root
|
|
|
|
* shell). Linux however requires monotonically decreasing privileges.
|
|
|
|
* This function will will perform some validation on Linux to make sure
|
|
|
|
* that `execpromises` is a subset of `promises`. Your libc wrapper for
|
|
|
|
* execve() will then apply its SECCOMP BPF filter later. Since Linux
|
|
|
|
* has to do this before calling sys_execve(), the executed process will
|
|
|
|
* be weakened to have execute permissions too.
|
2022-07-18 14:23:15 +00:00
|
|
|
*
|
2022-08-11 18:27:25 +00:00
|
|
|
* `__pledge_mode` is available to improve the experience of pledge() on
|
|
|
|
* Linux. It should specify one of the following penalties:
|
|
|
|
*
|
|
|
|
* - `PLEDGE_PENALTY_KILL_THREAD` causes the violating thread to be
|
|
|
|
* killed. This is the default on Linux. It's effectively the same as
|
|
|
|
* killing the process, since redbean has no threads. The termination
|
|
|
|
* signal can't be caught and will be either `SIGSYS` or `SIGABRT`.
|
|
|
|
* Consider enabling stderr logging below so you'll know why your
|
|
|
|
* program failed. Otherwise check the system log.
|
|
|
|
*
|
|
|
|
* - `PLEDGE_PENALTY_KILL_PROCESS` causes the process and all its
|
|
|
|
* threads to be killed. This is always the case on OpenBSD.
|
|
|
|
*
|
|
|
|
* - `PLEDGE_PENALTY_RETURN_EPERM` causes system calls to just return an
|
|
|
|
* `EPERM` error instead of killing. This is a gentler solution that
|
|
|
|
* allows code to display a friendly warning. Please note this may
|
|
|
|
* lead to weird behaviors if the software being sandboxed is lazy
|
|
|
|
* about checking error results.
|
|
|
|
*
|
|
|
|
* `mode` may optionally bitwise or the following flags:
|
|
|
|
*
|
|
|
|
* - `PLEDGE_STDERR_LOGGING` enables friendly error message logging
|
|
|
|
* letting you know which promises are needed whenever violations
|
|
|
|
* occur. Without this, violations will be logged to `dmesg` on Linux
|
|
|
|
* if the penalty is to kill the process. You would then need to
|
|
|
|
* manually look up the system call number and then cross reference it
|
|
|
|
* with the cosmopolitan libc pledge() documentation. You can also use
|
|
|
|
* `strace -ff` which is easier. This is ignored OpenBSD, which
|
|
|
|
* already has a good system log. Turning on stderr logging (which
|
|
|
|
* uses SECCOMP trapping) also means that the `WTERMSIG()` on your
|
|
|
|
* killed processes will always be `SIGABRT` on both Linux and
|
|
|
|
* OpenBSD. Otherwise, Linux prefers to raise `SIGSYS`. Enabling this
|
|
|
|
* option might not be a good idea if you're pledging `exec` because
|
|
|
|
* subprocesses can't inherit the `SIGSYS` handler this installs.
|
|
|
|
*
|
2022-06-27 20:01:58 +00:00
|
|
|
* @return 0 on success, or -1 w/ errno
|
2022-07-18 14:23:15 +00:00
|
|
|
* @raise EINVAL if `execpromises` on Linux isn't a subset of `promises`
|
2022-07-20 04:18:33 +00:00
|
|
|
* @raise EINVAL if `promises` allows exec and `execpromises` is null
|
2022-08-07 23:18:33 +00:00
|
|
|
* @threadsafe
|
2022-08-08 18:41:08 +00:00
|
|
|
* @vforksafe
|
2022-04-28 16:42:36 +00:00
|
|
|
*/
|
|
|
|
int pledge(const char *promises, const char *execpromises) {
|
2022-08-10 19:56:45 +00:00
|
|
|
int e, rc;
|
2022-07-18 14:23:15 +00:00
|
|
|
unsigned long ipromises, iexecpromises;
|
2023-01-18 08:56:09 +00:00
|
|
|
if (IsGenuineBlink()) {
|
2022-11-11 05:52:47 +00:00
|
|
|
rc = 0; // blink doesn't support seccomp
|
|
|
|
} else if (!ParsePromises(promises, &ipromises) &&
|
|
|
|
!ParsePromises(execpromises, &iexecpromises)) {
|
2022-07-18 14:23:15 +00:00
|
|
|
if (IsLinux()) {
|
|
|
|
// copy exec and execnative from promises to execpromises
|
2022-07-20 04:18:33 +00:00
|
|
|
iexecpromises = ~(~iexecpromises | (~ipromises & (1ul << PROMISE_EXEC)));
|
2022-07-18 14:23:15 +00:00
|
|
|
// if bits are missing in execpromises that exist in promises
|
|
|
|
// then execpromises wouldn't be a monotonic access reduction
|
|
|
|
// this check only matters when exec / execnative are allowed
|
|
|
|
if ((ipromises & ~iexecpromises) &&
|
2022-07-20 04:18:33 +00:00
|
|
|
(~ipromises & (1ul << PROMISE_EXEC))) {
|
2022-07-18 14:23:15 +00:00
|
|
|
STRACE("execpromises must be a subset of promises");
|
|
|
|
rc = einval();
|
|
|
|
} else {
|
2022-08-11 18:27:25 +00:00
|
|
|
rc = sys_pledge_linux(ipromises, __pledge_mode);
|
2022-08-08 18:41:08 +00:00
|
|
|
if (rc > -4096u) errno = -rc, rc = -1;
|
2022-07-18 14:23:15 +00:00
|
|
|
}
|
|
|
|
} else {
|
2022-08-10 19:56:45 +00:00
|
|
|
e = errno;
|
2022-07-20 04:18:33 +00:00
|
|
|
rc = sys_pledge(promises, execpromises);
|
2022-08-10 19:56:45 +00:00
|
|
|
if (rc && errno == ENOSYS) {
|
|
|
|
errno = e;
|
|
|
|
rc = 0;
|
|
|
|
}
|
2022-07-18 14:23:15 +00:00
|
|
|
}
|
2022-08-08 18:41:08 +00:00
|
|
|
if (!rc && !__vforked &&
|
|
|
|
(IsOpenbsd() || (IsLinux() && getpid() == gettid()))) {
|
2022-07-18 14:23:15 +00:00
|
|
|
__promises = ipromises;
|
|
|
|
__execpromises = iexecpromises;
|
2022-07-16 01:07:34 +00:00
|
|
|
}
|
2022-08-08 18:41:08 +00:00
|
|
|
} else {
|
|
|
|
rc = einval();
|
2022-04-28 16:42:36 +00:00
|
|
|
}
|
|
|
|
STRACE("pledge(%#s, %#s) → %d% m", promises, execpromises, rc);
|
|
|
|
return rc;
|
|
|
|
}
|