diff --git a/libc/calls/landlock_add_rule.c b/libc/calls/landlock_add_rule.c new file mode 100644 index 000000000..6d21975f6 --- /dev/null +++ b/libc/calls/landlock_add_rule.c @@ -0,0 +1,48 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/landlock.h" +#include "libc/calls/strace.internal.h" + +int sys_landlock_add_rule(int, enum landlock_rule_type, const void *, uint32_t); + +/** + * Adds new rule to Landlock ruleset. + * + * @error ENOSYS if Landlock isn't supported + * @error EPERM if Landlock supported but SECCOMP BPF shut it down + * @error EOPNOTSUPP if Landlock supported but disabled at boot time + * @error EINVAL if flags not 0, or inconsistent access in the rule, + * i.e. landlock_path_beneath_attr::allowed_access is not a subset + * of the ruleset handled accesses + * @error ENOMSG empty allowed_access + * @error EBADF `fd` is not a file descriptor for current thread, or + * member of `rule_attr` is not a file descriptor as expected + * @error EBADFD `fd` is not a ruleset file descriptor, or a member + * of `rule_attr` is not the expected file descriptor type + * @error EPERM `fd` has no write access to the underlying ruleset + * @error EFAULT `rule_attr` inconsistency + */ +int landlock_add_rule(int fd, enum landlock_rule_type rule_type, + const void *rule_attr, uint32_t flags) { + int rc; + rc = sys_landlock_add_rule(fd, rule_type, rule_attr, flags); + KERNTRACE("landlock_add_rule(%d, %d, %p, %#x) → %d% m", fd, rule_type, + rule_attr, flags, rc); + return rc; +} diff --git a/libc/calls/landlock_create_ruleset.c b/libc/calls/landlock_create_ruleset.c new file mode 100644 index 000000000..d57357ea5 --- /dev/null +++ b/libc/calls/landlock_create_ruleset.c @@ -0,0 +1,48 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/landlock.h" +#include "libc/calls/strace.internal.h" + +int sys_landlock_create_ruleset(const struct landlock_ruleset_attr *, size_t, + uint32_t); + +/** + * Create new Landlock filesystem sandboxing ruleset. + * + * You may also use this function to query the current ABI version: + * + * landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION); + * + * @return close exec file descriptor for new ruleset + * @error ENOSYS if not running Linux 5.13+ + * @error EPERM if pledge() or seccomp bpf shut it down + * @error EOPNOTSUPP Landlock supported but disabled at boot + * @error EINVAL unknown flags, or unknown access, or too small size + * @error E2BIG attr or size inconsistencies + * @error EFAULT attr or size inconsistencies + * @error ENOMSG empty landlock_ruleset_attr::handled_access_fs + */ +int landlock_create_ruleset(const struct landlock_ruleset_attr *attr, + size_t size, uint32_t flags) { + int rc; + rc = sys_landlock_create_ruleset(attr, size, flags); + KERNTRACE("landlock_create_ruleset(%p, %'zu, %#x) → %d% m", attr, size, flags, + rc); + return rc; +} diff --git a/libc/calls/landlock_restrict_self.c b/libc/calls/landlock_restrict_self.c new file mode 100644 index 000000000..96c2ad3b7 --- /dev/null +++ b/libc/calls/landlock_restrict_self.c @@ -0,0 +1,42 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/calls/landlock.h" +#include "libc/calls/strace.internal.h" + +int sys_landlock_restrict_self(int, uint32_t); + +/** + * Enforces Landlock ruleset on calling thread. + * + * @error EOPNOTSUPP if Landlock supported but disabled at boot time + * @error EINVAL if flags isn't zero + * @error EBADF if `fd` isn't file descriptor for the current thread + * @error EBADFD if `fd` is not a ruleset file descriptor + * @error EPERM if `fd` has no read access to underlying ruleset, or + * current thread is not running with no_new_privs, or it doesn’t + * have CAP_SYS_ADMIN in its namespace + * @error E2BIG if the maximum number of stacked rulesets is + * reached for current thread + */ +int landlock_restrict_self(int fd, uint32_t flags) { + int rc; + rc = sys_landlock_restrict_self(fd, flags); + KERNTRACE("landlock_create_ruleset(%d, %#x) → %d% m", fd, flags, rc); + return rc; +} diff --git a/libc/calls/termios.h b/libc/calls/termios.h index 9f88f7786..ccb73e11a 100644 --- a/libc/calls/termios.h +++ b/libc/calls/termios.h @@ -20,6 +20,7 @@ int openpty(int *, int *, char *, const struct termios *, const struct winsize *) paramsnonnull((1, 2)) dontdiscard; int forkpty(int *, char *, const struct termios *, const struct winsize *) paramsnonnull((1, 2)) dontdiscard; +char *ptsname(int); errno_t ptsname_r(int, char *, size_t); int grantpt(int); diff --git a/libc/fmt/joinpaths.c b/libc/fmt/joinpaths.c new file mode 100644 index 000000000..2643fe120 --- /dev/null +++ b/libc/fmt/joinpaths.c @@ -0,0 +1,68 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" +#include "libc/str/path.h" +#include "libc/str/str.h" +#include "libc/sysv/errfuns.h" + +/** + * Joins paths, e.g. + * + * "a" + "b" → "a/b" + * "a/" + "b" → "a/b" + * "a" + "b/" → "a/b/" + * "a" + "/b" → "/b" + * "." + "b" → "b" + * "" + "b" → "b" + * + * @return joined path, which may be `buf`, `path`, or `other`, or null + * if (1) `buf` didn't have enough space, or (2) both `path` and + * `other` were null + */ +char *_joinpaths(char *buf, size_t size, const char *path, const char *other) { + size_t pathlen, otherlen; + if (!other) return path; + if (!path) return other; + otherlen = strlen(other); + if (!otherlen) { + return (/*unconst*/ char *)path; + } + pathlen = strlen(path); + if (!pathlen || (READ16LE(path) == READ16LE(".")) || *other == '/') { + return (/*unconst*/ char *)other; + } + if (path[pathlen - 1] == '/') { + if (pathlen + otherlen + 1 <= size) { + memmove(buf, path, pathlen); + memmove(buf + pathlen, other, otherlen + 1); + return buf; + } else { + return 0; + } + } else { + if (pathlen + 1 + otherlen + 1 <= size) { + memmove(buf, path, pathlen); + buf[pathlen] = '/'; + memmove(buf + pathlen + 1, other, otherlen + 1); + return buf; + } else { + return 0; + } + } +} diff --git a/libc/intrin/kerrnodocs.S b/libc/intrin/kerrnodocs.S index d03750d07..5d0a1b25c 100644 --- a/libc/intrin/kerrnodocs.S +++ b/libc/intrin/kerrnodocs.S @@ -116,6 +116,7 @@ kErrnoDocs: .e ENOTRECOVERABLE,"State not recoverable" .e ENONET,"Machine is not on the network" .e ERESTART,"Interrupted system call should be restarted" + .e EBADFD,"File descriptor in bad state" .long MAGNUM_TERMINATOR .endobj kErrnoDocs,globl,hidden .overrun diff --git a/libc/intrin/kerrnonames.S b/libc/intrin/kerrnonames.S index 85394e0e2..bbbce5c13 100644 --- a/libc/intrin/kerrnonames.S +++ b/libc/intrin/kerrnonames.S @@ -117,6 +117,7 @@ kErrnoNames: .e ENONET .e ERESTART .e ENODATA + .e EBADFD .long MAGNUM_TERMINATOR .endobj kErrnoNames,globl,hidden .overrun diff --git a/libc/intrin/promises.internal.h b/libc/intrin/promises.internal.h index d421c8bba..ccb0584f2 100644 --- a/libc/intrin/promises.internal.h +++ b/libc/intrin/promises.internal.h @@ -1,25 +1,26 @@ #ifndef COSMOPOLITAN_LIBC_INTRIN_PROMISES_H_ #define COSMOPOLITAN_LIBC_INTRIN_PROMISES_H_ -#define PROMISE_STDIO 0 -#define PROMISE_RPATH 1 -#define PROMISE_WPATH 2 -#define PROMISE_CPATH 3 -#define PROMISE_DPATH 4 -#define PROMISE_FLOCK 5 -#define PROMISE_FATTR 6 -#define PROMISE_INET 7 -#define PROMISE_UNIX 8 -#define PROMISE_DNS 9 -#define PROMISE_TTY 10 -#define PROMISE_RECVFD 11 -#define PROMISE_PROC 12 -#define PROMISE_THREAD 13 -#define PROMISE_EXEC 14 -#define PROMISE_EXECNATIVE 15 -#define PROMISE_ID 16 -#define PROMISE_UNVEIL 17 -#define PROMISE_SENDFD 18 +#define PROMISE_STDIO 0 +#define PROMISE_RPATH 1 +#define PROMISE_WPATH 2 +#define PROMISE_CPATH 3 +#define PROMISE_DPATH 4 +#define PROMISE_FLOCK 5 +#define PROMISE_FATTR 6 +#define PROMISE_INET 7 +#define PROMISE_UNIX 8 +#define PROMISE_DNS 9 +#define PROMISE_TTY 10 +#define PROMISE_RECVFD 11 +#define PROMISE_PROC 12 +#define PROMISE_EXEC 13 +#define PROMISE_ID 14 +#define PROMISE_UNVEIL 15 +#define PROMISE_SENDFD 16 +#define PROMISE_SETTIME 17 +#define PROMISE_PROT_EXEC 18 +#define PROMISE_VMINFO 19 #define PLEDGED(x) ((~__promises >> PROMISE_##x) & 1) diff --git a/libc/mem/pledge.c b/libc/mem/pledge.c index 3370982c7..32f03ad6f 100644 --- a/libc/mem/pledge.c +++ b/libc/mem/pledge.c @@ -25,6 +25,7 @@ #include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall_support-sysv.internal.h" #include "libc/dce.h" +#include "libc/intrin/kprintf.h" #include "libc/intrin/promises.internal.h" #include "libc/limits.h" #include "libc/macros.internal.h" @@ -34,6 +35,7 @@ #include "libc/str/str.h" #include "libc/sysv/consts/af.h" #include "libc/sysv/consts/audit.h" +#include "libc/sysv/consts/clone.h" #include "libc/sysv/consts/f.h" #include "libc/sysv/consts/map.h" #include "libc/sysv/consts/nrlinux.h" @@ -49,6 +51,8 @@ #define ADDRLESS 0x2000 #define LOCK 0x8000 #define TTY 0x8000 +#define NOEXEC 0x8000 +#define THREAD 0x8000 // TODO(jart): fix chibicc #ifdef __chibicc__ @@ -68,8 +72,15 @@ static const uint16_t kPledgeLinuxDefault[] = { __NR_linux_exit, // }; +// the stdio contains all the benign system calls. openbsd makes the +// assumption that preexisting file descriptors are trustworthy. we +// implement checking for these as a simple linear scan rather than +// binary search, since there doesn't appear to be any measurable +// difference in the latency of sched_yield() if it's at the start of +// the bpf script or the end. static const uint16_t kPledgeLinuxStdio[] = { __NR_linux_exit_group, // + __NR_linux_sched_yield, // __NR_linux_clock_getres, // __NR_linux_clock_gettime, // __NR_linux_clock_nanosleep, // @@ -91,6 +102,7 @@ static const uint16_t kPledgeLinuxStdio[] = { __NR_linux_fcntl, // __NR_linux_fstat, // __NR_linux_fsync, // + __NR_linux_sysinfo, // __NR_linux_fdatasync, // __NR_linux_ftruncate, // __NR_linux_getdents, // @@ -114,22 +126,24 @@ static const uint16_t kPledgeLinuxStdio[] = { __NR_linux_timerfd_create, // __NR_linux_timerfd_settime, // __NR_linux_timerfd_gettime, // - __NR_linux_gettimeofday, // __NR_linux_copy_file_range, // + __NR_linux_gettimeofday, // __NR_linux_sendfile, // __NR_linux_vmsplice, // __NR_linux_splice, // __NR_linux_lseek, // __NR_linux_tee, // __NR_linux_brk, // - __NR_linux_mmap, // __NR_linux_msync, // + __NR_linux_mmap | NOEXEC, // __NR_linux_munmap, // __NR_linux_mincore, // __NR_linux_madvise, // __NR_linux_fadvise, // - __NR_linux_mprotect, // + __NR_linux_mprotect | NOEXEC, // __NR_linux_arch_prctl, // + __NR_linux_migrate_pages, // + __NR_linux_sync_file_range, // __NR_linux_set_tid_address, // __NR_linux_nanosleep, // __NR_linux_pipe, // @@ -150,17 +164,27 @@ static const uint16_t kPledgeLinuxStdio[] = { __NR_linux_alarm, // __NR_linux_pause, // __NR_linux_shutdown, // + __NR_linux_eventfd, // + __NR_linux_eventfd2, // + __NR_linux_signalfd, // + __NR_linux_signalfd4, // __NR_linux_sigaction, // __NR_linux_sigaltstack, // __NR_linux_sigprocmask, // __NR_linux_sigsuspend, // __NR_linux_sigreturn, // + __NR_linux_sigpending, // __NR_linux_socketpair, // + __NR_linux_getrusage, // + __NR_linux_times, // __NR_linux_umask, // __NR_linux_wait4, // __NR_linux_uname, // __NR_linux_prctl, // - __NR_linux_sched_yield, // + __NR_linux_clone | THREAD, // + __NR_linux_futex, // + __NR_linux_set_robust_list, // + __NR_linux_get_robust_list, // }; static const uint16_t kPledgeLinuxFlock[] = { @@ -265,6 +289,7 @@ static const uint16_t kPledgeLinuxUnix[] = { static const uint16_t kPledgeLinuxDns[] = { __NR_linux_socket | INET, // + __NR_linux_bind, // __NR_linux_sendto, // __NR_linux_connect, // __NR_linux_recvfrom, // @@ -275,11 +300,13 @@ static const uint16_t kPledgeLinuxTty[] = { }; static const uint16_t kPledgeLinuxRecvfd[] = { - __NR_linux_recvmsg, // + __NR_linux_recvmsg, // + __NR_linux_recvmmsg, // }; static const uint16_t kPledgeLinuxSendfd[] = { - __NR_linux_sendmsg, // + __NR_linux_sendmsg, // + __NR_linux_sendmmsg, // }; static const uint16_t kPledgeLinuxProc[] = { @@ -293,14 +320,9 @@ static const uint16_t kPledgeLinuxProc[] = { __NR_linux_setrlimit, // __NR_linux_getpriority, // __NR_linux_setpriority, // -}; - -static const uint16_t kPledgeLinuxThread[] = { - __NR_linux_clone, // - __NR_linux_futex, // - __NR_linux_tgkill, // - __NR_linux_set_robust_list, // - __NR_linux_get_robust_list, // + __NR_linux_ioprio_get, // + __NR_linux_ioprio_set, // + __NR_linux_tgkill, // }; static const uint16_t kPledgeLinuxId[] = { @@ -319,13 +341,23 @@ static const uint16_t kPledgeLinuxId[] = { __NR_linux_setfsgid, // }; +static const uint16_t kPledgeLinuxSettime[] = { + __NR_linux_settimeofday, // + __NR_linux_clock_adjtime, // +}; + +static const uint16_t kPledgeLinuxProtExec[] = { + __NR_linux_mmap, // + __NR_linux_mprotect, // +}; + static const uint16_t kPledgeLinuxExec[] = { __NR_linux_execve, // __NR_linux_execveat, // - __NR_linux_access, // - __NR_linux_faccessat, // - __NR_linux_open | READONLY, // - __NR_linux_openat | READONLY, // + __NR_linux_access, // for ape loader + __NR_linux_faccessat, // for ape binaries + __NR_linux_open | READONLY, // for ape loader + __NR_linux_openat | READONLY, // for ape binaries }; static const uint16_t kPledgeLinuxExec2[] = { @@ -339,30 +371,37 @@ static const uint16_t kPledgeLinuxUnveil[] = { __NR_linux_landlock_restrict_self, // }; +// placeholder group +// pledge.com checks this to do auto-unveiling +static const uint16_t kPledgeLinuxVminfo[] = { + __NR_linux_openat | READONLY, // +}; + static const struct Pledges { const char *name; const uint16_t *syscalls; const size_t len; } kPledgeLinux[] = { - [PROMISE_STDIO] = {"stdio", PLEDGE(kPledgeLinuxStdio)}, // - [PROMISE_RPATH] = {"rpath", PLEDGE(kPledgeLinuxRpath)}, // - [PROMISE_WPATH] = {"wpath", PLEDGE(kPledgeLinuxWpath)}, // - [PROMISE_CPATH] = {"cpath", PLEDGE(kPledgeLinuxCpath)}, // - [PROMISE_DPATH] = {"dpath", PLEDGE(kPledgeLinuxDpath)}, // - [PROMISE_FLOCK] = {"flock", PLEDGE(kPledgeLinuxFlock)}, // - [PROMISE_FATTR] = {"fattr", PLEDGE(kPledgeLinuxFattr)}, // - [PROMISE_INET] = {"inet", PLEDGE(kPledgeLinuxInet)}, // - [PROMISE_UNIX] = {"unix", PLEDGE(kPledgeLinuxUnix)}, // - [PROMISE_DNS] = {"dns", PLEDGE(kPledgeLinuxDns)}, // - [PROMISE_TTY] = {"tty", PLEDGE(kPledgeLinuxTty)}, // - [PROMISE_RECVFD] = {"recvfd", PLEDGE(kPledgeLinuxRecvfd)}, // - [PROMISE_SENDFD] = {"sendfd", PLEDGE(kPledgeLinuxSendfd)}, // - [PROMISE_PROC] = {"proc", PLEDGE(kPledgeLinuxProc)}, // - [PROMISE_THREAD] = {"thread", PLEDGE(kPledgeLinuxThread)}, // - [PROMISE_EXEC] = {"exec", PLEDGE(kPledgeLinuxExec)}, // - [PROMISE_EXECNATIVE] = {"execnative", PLEDGE(kPledgeLinuxExec2)}, // - [PROMISE_ID] = {"id", PLEDGE(kPledgeLinuxId)}, // - [PROMISE_UNVEIL] = {"unveil", PLEDGE(kPledgeLinuxUnveil)}, // + [PROMISE_STDIO] = {"stdio", PLEDGE(kPledgeLinuxStdio)}, // + [PROMISE_RPATH] = {"rpath", PLEDGE(kPledgeLinuxRpath)}, // + [PROMISE_WPATH] = {"wpath", PLEDGE(kPledgeLinuxWpath)}, // + [PROMISE_CPATH] = {"cpath", PLEDGE(kPledgeLinuxCpath)}, // + [PROMISE_DPATH] = {"dpath", PLEDGE(kPledgeLinuxDpath)}, // + [PROMISE_FLOCK] = {"flock", PLEDGE(kPledgeLinuxFlock)}, // + [PROMISE_FATTR] = {"fattr", PLEDGE(kPledgeLinuxFattr)}, // + [PROMISE_INET] = {"inet", PLEDGE(kPledgeLinuxInet)}, // + [PROMISE_UNIX] = {"unix", PLEDGE(kPledgeLinuxUnix)}, // + [PROMISE_DNS] = {"dns", PLEDGE(kPledgeLinuxDns)}, // + [PROMISE_TTY] = {"tty", PLEDGE(kPledgeLinuxTty)}, // + [PROMISE_RECVFD] = {"recvfd", PLEDGE(kPledgeLinuxRecvfd)}, // + [PROMISE_SENDFD] = {"sendfd", PLEDGE(kPledgeLinuxSendfd)}, // + [PROMISE_PROC] = {"proc", PLEDGE(kPledgeLinuxProc)}, // + [PROMISE_EXEC] = {"exec", PLEDGE(kPledgeLinuxExec)}, // + [PROMISE_ID] = {"id", PLEDGE(kPledgeLinuxId)}, // + [PROMISE_UNVEIL] = {"unveil", PLEDGE(kPledgeLinuxUnveil)}, // + [PROMISE_SETTIME] = {"settime", PLEDGE(kPledgeLinuxSettime)}, // + [PROMISE_PROT_EXEC] = {"prot_exec", PLEDGE(kPledgeLinuxProtExec)}, // + [PROMISE_VMINFO] = {"vminfo", PLEDGE(kPledgeLinuxVminfo)}, // }; static const struct sock_filter kFilterStart[] = { @@ -373,6 +412,9 @@ static const struct sock_filter kFilterStart[] = { // each filter assumes ordinal is already loaded into accumulator BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), // Forbid some system calls with ENOSYS (rather than EPERM) + BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 5, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_rseq, 4, 0), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_memfd_create, 3, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_openat2, 2, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone3, 1, 0), BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_statx, 0, 1), @@ -397,7 +439,7 @@ static bool AppendFilter(struct Filter *f, struct sock_filter *p, size_t n) { // SYSCALL is only allowed in the .privileged section // We assume program image is loaded in 32-bit spaces -static bool AppendOriginVerification(struct Filter *f) { +static bool AppendOriginVerification(struct Filter *f, long ipromises) { intptr_t x = (intptr_t)__privileged_start; intptr_t y = (intptr_t)__privileged_end; assert(0 < x && x < y && y < INT_MAX); @@ -424,6 +466,55 @@ static bool AllowSyscall(struct Filter *f, uint16_t w) { return AppendFilter(f, PLEDGE(fragment)); } +// The first argument of sys_clone_linux() must NOT have: +// +// - CLONE_NEWNS (0x00020000) +// - CLONE_PTRACE (0x00002000) +// - CLONE_UNTRACED (0x00800000) +// +static bool AllowClone(struct Filter *f) { + static const struct sock_filter fragment[] = { + /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone, 0, 6 - 1), + /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), + /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00822000), + /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), + /*L4*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /*L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), + /*L6*/ /* next filter */ + }; + return AppendFilter(f, PLEDGE(fragment)); +} + +// The first argument of sys_clone_linux() must have: +// +// - CLONE_VM (0x00000100) +// - CLONE_FS (0x00000200) +// - CLONE_FILES (0x00000400) +// - CLONE_THREAD (0x00010000) +// - CLONE_SIGHAND (0x00000800) +// +// The first argument of sys_clone_linux() must NOT have: +// +// - CLONE_NEWNS (0x00020000) +// - CLONE_PTRACE (0x00002000) +// - CLONE_UNTRACED (0x00800000) +// +static bool AllowCloneThread(struct Filter *f) { + static const struct sock_filter fragment[] = { + /*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_clone, 0, 9 - 1), + /*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), + /*L2*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00010f00), + /*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0x00010f00, 0, 8 - 4), + /*L4*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])), + /*L5*/ BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0x00822000), + /*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1), + /*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), + /*L9*/ /* next filter */ + }; + return AppendFilter(f, PLEDGE(fragment)); +} + // The second argument of ioctl() must be one of: // // - FIONREAD (0x541b) @@ -488,33 +579,48 @@ static bool AllowIoctlTty(struct Filter *f) { // // The optname argument of setsockopt() must be one of: // -// - SO_TYPE ( 3) -// - SO_REUSEPORT (15) -// - SO_REUSEADDR ( 2) -// - SO_KEEPALIVE ( 9) -// - SO_RCVTIMEO (20) -// - SO_SNDTIMEO (21) -// - IP_RECVTTL (12) +// - TCP_NODELAY ( 1) +// - TCP_CORK ( 3) +// - TCP_KEEPIDLE ( 4) +// - TCP_KEEPINTVL ( 5) +// - SO_TYPE ( 3) +// - SO_ERROR ( 4) +// - SO_DONTROUTE ( 5) +// - SO_REUSEPORT (15) +// - SO_REUSEADDR ( 2) +// - SO_KEEPALIVE ( 9) +// - SO_RCVTIMEO (20) +// - SO_SNDTIMEO (21) +// - IP_RECVTTL (12) +// - IP_RECVERR (11) +// - TCP_FASTOPEN (23) +// - TCP_FASTOPEN_CONNECT (30) // static bool AllowSetsockopt(struct Filter *f) { static const int nr = __NR_linux_setsockopt; static const struct sock_filter fragment[] = { - /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 15 - 1), + /* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, nr, 0, 21 - 1), /* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[1])), /* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0, 5 - 3, 0), /* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 5 - 4, 0), - /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 14 - 5), + /* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 6, 0, 20 - 5), /* L5*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[2])), - /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 13 - 7, 0), - /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 12, 13 - 8, 0), - /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 13 - 9, 0), - /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 13 - 10, 0), - /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 13 - 11, 0), - /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 13 - 12, 0), - /*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 14 - 13), - /*L13*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), - /*L14*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), - /*L15*/ /* next filter */ + /* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 3, 19 - 7, 0), + /* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 12, 19 - 8, 0), + /* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 19, 19 - 9, 0), + /* L9*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 2, 19 - 10, 0), + /*L10*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 9, 19 - 11, 0), + /*L11*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 20, 19 - 12, 0), + /*L12*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 1, 19 - 13, 0), + /*L13*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 11, 19 - 14, 0), + /*L14*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 4, 19 - 15, 0), + /*L15*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 5, 19 - 16, 0), + /*L16*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 23, 19 - 17, 0), + /*L17*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 30, 19 - 18, 0), + /*L18*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 0, 20 - 19), + /*L19*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /*L20*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)), + /*L21*/ /* next filter */ }; return AppendFilter(f, PLEDGE(fragment)); } @@ -992,24 +1098,18 @@ static bool AllowFchmodat(struct Filter *f) { return AppendFilter(f, PLEDGE(fragment)); } -static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len, - bool needmapexec, bool needmorphing) { +static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len) { int i; for (i = 0; i < len; ++i) { switch (p[i]) { case __NR_linux_mmap: - if (needmapexec) { - if (!AllowMmap(f)) return false; - } else { - if (!AllowMmapNoexec(f)) return false; - } + if (!AllowMmap(f)) return false; break; - case __NR_linux_mprotect: - if (needmorphing) { - if (!AllowSyscall(f, __NR_linux_mprotect)) return false; - } else { - if (!AllowMprotectNoexec(f)) return false; - } + case __NR_linux_mmap | NOEXEC: + if (!AllowMmapNoexec(f)) return false; + break; + case __NR_linux_mprotect | NOEXEC: + if (!AllowMprotectNoexec(f)) return false; break; case __NR_linux_chmod: if (!AllowChmod(f)) return false; @@ -1071,6 +1171,12 @@ static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len, case __NR_linux_sendto | ADDRLESS: if (!AllowSendtoAddrless(f)) return false; break; + case __NR_linux_clone: + if (!AllowClone(f)) return false; + break; + case __NR_linux_clone | THREAD: + if (!AllowCloneThread(f)) return false; + break; default: assert(~p[i] & ~0xfff); if (!AllowSyscall(f, p[i])) return false; @@ -1083,23 +1189,16 @@ static bool AppendPledge(struct Filter *f, const uint16_t *p, size_t len, int sys_pledge_linux(unsigned long ipromises) { bool ok = true; int i, rc = -1; - bool needmapexec; - bool needmorphing; - bool needexecnative; struct Filter f = {0}; ipromises = ~ipromises; - needmapexec = (ipromises >> PROMISE_EXEC) & 1; - needmorphing = (ipromises >> PROMISE_THREAD) & 1; - needexecnative = (ipromises >> PROMISE_EXECNATIVE) & 1; if (AppendFilter(&f, kFilterStart, ARRAYLEN(kFilterStart)) && - (needmapexec || needexecnative || AppendOriginVerification(&f)) && - AppendPledge(&f, kPledgeLinuxDefault, ARRAYLEN(kPledgeLinuxDefault), - needmapexec, needmorphing)) { + ((ipromises & (1ul << PROMISE_EXEC)) || + AppendOriginVerification(&f, ipromises)) && + AppendPledge(&f, kPledgeLinuxDefault, ARRAYLEN(kPledgeLinuxDefault))) { for (i = 0; i < ARRAYLEN(kPledgeLinux); ++i) { if ((ipromises & (1ul << i)) && kPledgeLinux[i].name) { ipromises &= ~(1ul << i); - if (!AppendPledge(&f, kPledgeLinux[i].syscalls, kPledgeLinux[i].len, - needmapexec, needmorphing)) { + if (!AppendPledge(&f, kPledgeLinux[i].syscalls, kPledgeLinux[i].len)) { ok = false; rc = einval(); break; @@ -1131,7 +1230,7 @@ static int FindPromise(const char *name) { return -1; } -static int ParsePromises(const char *promises, unsigned long *out) { +int ParsePromises(const char *promises, unsigned long *out) { int rc = 0; int promise; unsigned long ipromises; @@ -1158,18 +1257,6 @@ static int ParsePromises(const char *promises, unsigned long *out) { return rc; } -static void FixupOpenbsdPromises(char *p) { - if (!p) return; - if ((p = strstr(p, "execnative"))) { - p[4] = ' '; - p[5] = ' '; - p[6] = ' '; - p[7] = ' '; - p[8] = ' '; - p[9] = ' '; - } -} - /** * Restricts system operations, e.g. * @@ -1223,30 +1310,31 @@ static void FixupOpenbsdPromises(char *p) { * * - "stdio" allows exit, close, dup, dup2, dup3, fchdir, fstat, fsync, * fdatasync, ftruncate, getdents, getegid, getrandom, geteuid, - * getgid, getgroups, getitimer, getpgid, getpgrp, getpid, getppid, - * getresgid, getresuid, getrlimit, getsid, wait4, gettimeofday, - * getuid, lseek, madvise, brk, arch_prctl, uname, set_tid_address, - * clock_getres, clock_gettime, clock_nanosleep, mmap (PROT_EXEC and - * weird flags aren't allowed), mprotect (PROT_EXEC isn't allowed), - * msync, munmap, nanosleep, pipe, pipe2, read, readv, pread, recv, - * poll, recvfrom, preadv, write, writev, pwrite, pwritev, select, - * pselect6, copy_file_range, sendfile, splice, vmsplice, alarm, - * pause, send, sendto (only if addr is null), setitimer, shutdown, - * sigaction (but SIGSYS is forbidden), sigaltstack, sigprocmask, - * sigreturn, sigsuspend, umask, mincore, socketpair, ioctl(FIONREAD), + * getgid, getgroups, times, getrusage, getitimer, getpgid, getpgrp, + * getpid, getppid, getresgid, getresuid, getrlimit, getsid, wait4, + * gettimeofday, getuid, lseek, madvise, brk, arch_prctl, uname, + * set_tid_address, clock_getres, clock_gettime, clock_nanosleep, mmap + * (PROT_EXEC and weird flags aren't allowed), mprotect (PROT_EXEC + * isn't allowed), msync, sync_file_range, migrate_pages, munmap, + * nanosleep, pipe, pipe2, read, readv, pread, recv, poll, recvfrom, + * preadv, write, writev, pwrite, pwritev, select, pselect6, + * copy_file_range, sendfile, tee, splice, vmsplice, alarm, pause, + * send, sendto (only if addr is null), setitimer, shutdown, sigaction + * (but SIGSYS is forbidden), sigaltstack, sigprocmask, sigreturn, + * sigsuspend, umask, mincore, socketpair, ioctl(FIONREAD), * ioctl(FIONBIO), ioctl(FIOCLEX), ioctl(FIONCLEX), fcntl(F_GETFD), * fcntl(F_SETFD), fcntl(F_GETFL), fcntl(F_SETFL), sched_yield, * epoll_create, epoll_create1, epoll_ctl, epoll_wait, epoll_pwait, - * epoll_pwait2. + * epoll_pwait2, clone(CLONE_THREAD), futex, set_robust_list, + * get_robust_list, sigpending. * * - "rpath" (read-only path ops) allows chdir, getcwd, open(O_RDONLY), - * openat(O_RDONLY), stat, fstat, lstat, fstatat, access, - * faccessat,faccessat2, readlink, readlinkat, statfs, fstatfs. + * openat(O_RDONLY), stat, fstat, lstat, fstatat, access, faccessat, + * faccessat2, readlink, readlinkat, statfs, fstatfs. * * - "wpath" (write path ops) allows getcwd, open(O_WRONLY), - * openat(O_WRONLY), stat, fstat, lstat, fstatat, access, - * faccessat,faccessat2, readlink, readlinkat, chmod, fchmod, - * fchmodat. + * openat(O_WRONLY), stat, fstat, lstat, fstatat, access, faccessat, + * faccessat2, readlink, readlinkat, chmod, fchmod, fchmodat. * * - "cpath" (create path ops) allows open(O_CREAT), openat(O_CREAT), * rename, renameat, renameat2, link, linkat, symlink, symlinkat, @@ -1260,9 +1348,9 @@ static void FixupOpenbsdPromises(char *p) { * - "tty" allows ioctl(TIOCGWINSZ), ioctl(TCGETS), ioctl(TCSETS), * ioctl(TCSETSW), ioctl(TCSETSF). * - * - "recvfd" allows recvmsg in general (for SCM_RIGHTS). + * - "recvfd" allows recvmsg and recvmmsg. * - * - "recvfd" allows sendmsg in general (for SCM_RIGHTS). + * - "recvfd" allows sendmsg and sendmmsg. * * - "fattr" allows chmod, fchmod, fchmodat, utime, utimes, futimens, * utimensat. @@ -1275,32 +1363,35 @@ static void FixupOpenbsdPromises(char *p) { * * - "dns" allows socket(AF_INET), sendto, recvfrom, connect. * - * - "proc" allows fork, vfork, kill, getpriority, setpriority, prlimit, - * setrlimit, setpgid, setsid. - * - * - "thread" allows clone, futex, and permits PROT_EXEC in mprotect. + * - "proc" allows fork, vfork, clone, kill, tgkill, getpriority, + * setpriority, prlimit, setrlimit, setpgid, setsid. * * - "id" allows setuid, setreuid, setresuid, setgid, setregid, * setresgid, setgroups, prlimit, setrlimit, getpriority, setpriority, * setfsuid, setfsgid. * - * - "exec" allows execve, execveat, access, faccessat. On Linux this - * also weakens some security to permit running APE binaries. However - * on OpenBSD they must be assimilate beforehand. On Linux, mmap() - * will be loosened up to allow creating PROT_EXEC memory (for APE - * loader) and system call origin verification won't be activated. + * - "settime" allows settimeofday and clock_adjtime. * - * - "execnative" allows execve, execveat. Can only be used to run - * native executables; you won't be able to run APE binaries. mmap() - * and mprotect() are still prevented from creating executable memory. - * System call origin verification can't be enabled. If you always - * assimilate your APE binaries, then this should be preferred. On - * OpenBSD this will be rewritten to be "exec". + * - "exec" allows execve, execveat, access, openat(O_RDONLY). If the + * executable in question needs a loader, then you may need prot_exec + * too. With APE, security will be stronger if you assimilate your + * binaries beforehand, using the --assimilate flag, or the + * o//tool/build/assimilate.com program. + * + * - "prot_exec" allows mmap(PROT_EXEC) and mprotect(PROT_EXEC). This is + * needed to (1) code morph mutexes in __enable_threads(), and it's + * needed to (2) launch non-static or non-native executables, e.g. + * non-assimilated APE binaries, or dynamic-linked executables. * * - "unveil" allows unveil() to be called, as well as the underlying * landlock_create_ruleset, landlock_add_rule, landlock_restrict_self * calls on Linux. * + * - "vminfo" OpenBSD defines this for programs like `top`. On Linux, + * this is a placeholder group that lets tools like pledge.com check + * `__promises` and automatically unveil() a subset of files top would + * need, e.g. /proc/stat, /proc/meminfo. + * * `execpromises` only matters if "exec" or "execnative" are specified * in `promises`. In that case, this specifies the promises that'll * apply once execve() happens. If this is NULL then the default is @@ -1316,44 +1407,28 @@ static void FixupOpenbsdPromises(char *p) { * @return 0 on success, or -1 w/ errno * @raise ENOSYS if host os isn't Linux or OpenBSD * @raise EINVAL if `execpromises` on Linux isn't a subset of `promises` + * @raise EINVAL if `promises` allows exec and `execpromises` is null */ int pledge(const char *promises, const char *execpromises) { int rc; - char *p, *q; unsigned long ipromises, iexecpromises; if (!(rc = ParsePromises(promises, &ipromises)) && !(rc = ParsePromises(execpromises, &iexecpromises))) { if (IsLinux()) { // copy exec and execnative from promises to execpromises - iexecpromises = - ~(~iexecpromises | (~ipromises & ((1ul << PROMISE_EXEC) | // - (1ul << PROMISE_EXECNATIVE)))); + iexecpromises = ~(~iexecpromises | (~ipromises & (1ul << PROMISE_EXEC))); // if bits are missing in execpromises that exist in promises // then execpromises wouldn't be a monotonic access reduction // this check only matters when exec / execnative are allowed if ((ipromises & ~iexecpromises) && - (~ipromises & - ((1ul << PROMISE_EXEC) | (1ul << PROMISE_EXECNATIVE)))) { + (~ipromises & (1ul << PROMISE_EXEC))) { STRACE("execpromises must be a subset of promises"); rc = einval(); } else { rc = sys_pledge_linux(ipromises); } } else { - // openbsd only supports execnative and calls it exec - if ((p = strdup(promises))) { - FixupOpenbsdPromises(p); - if ((q = execpromises ? strdup(execpromises) : 0) || !execpromises) { - FixupOpenbsdPromises(q); - rc = sys_pledge(p, q); - free(q); - } else { - rc = -1; - } - free(p); - } else { - rc = -1; - } + rc = sys_pledge(promises, execpromises); } if (!rc) { __promises = ipromises; diff --git a/libc/mem/unveil.c b/libc/mem/unveil.c index 038f3ee3e..2dac9af6c 100644 --- a/libc/mem/unveil.c +++ b/libc/mem/unveil.c @@ -28,11 +28,13 @@ #include "libc/calls/syscall-sysv.internal.h" #include "libc/calls/syscall_support-sysv.internal.h" #include "libc/errno.h" +#include "libc/fmt/conv.h" #include "libc/intrin/kprintf.h" #include "libc/macros.internal.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/threaded.h" #include "libc/runtime/internal.h" +#include "libc/str/path.h" #include "libc/str/str.h" #include "libc/sysv/consts/at.h" #include "libc/sysv/consts/audit.h" @@ -144,29 +146,109 @@ static int unveil_init(void) { static int sys_unveil_linux(const char *path, const char *permissions) { int rc; + if (!State.fd && (rc = unveil_init()) == -1) return rc; if ((path && !permissions) || (!path && permissions)) return einval(); if (!path && !permissions) return unveil_final(); struct landlock_path_beneath_attr pb = {0}; for (const char *c = permissions; *c != '\0'; c++) { switch (*c) { - // clang-format off - case 'r': pb.allowed_access |= UNVEIL_READ; break; - case 'w': pb.allowed_access |= UNVEIL_WRITE; break; - case 'x': pb.allowed_access |= UNVEIL_EXEC; break; - case 'c': pb.allowed_access |= UNVEIL_CREATE; break; - default: return einval(); - // clang-format on + case 'r': + pb.allowed_access |= UNVEIL_READ; + break; + case 'w': + pb.allowed_access |= UNVEIL_WRITE; + break; + case 'x': + pb.allowed_access |= UNVEIL_EXEC; + break; + case 'c': + pb.allowed_access |= UNVEIL_CREATE; + break; + default: + return einval(); } } pb.allowed_access &= State.fs_mask; - if ((rc = sys_open(path, O_PATH | O_CLOEXEC, 0)) == -1) return rc; + + // landlock exposes all metadata, so we only technically need to add + // realpath(path) to the ruleset. however a corner case exists where + // it isn't valid, e.g. /dev/stdin -> /proc/2834/fd/pipe:[51032], so + // we'll need to work around this, by adding the path which is valid + const char *dir; + const char *last; + const char *next; + struct { + char lbuf[PATH_MAX]; + char buf1[PATH_MAX]; + char buf2[PATH_MAX]; + char buf3[PATH_MAX]; + char buf4[PATH_MAX]; + } * b; + if (strlen(path) + 1 > PATH_MAX) return enametoolong(); + if (!(b = malloc(sizeof(*b)))) return -1; + last = path; + next = path; + for (int i = 0;; ++i) { + if (i == 64) { + // give up + free(b); + return eloop(); + } + int err = errno; + if ((rc = sys_readlinkat(AT_FDCWD, next, b->lbuf, PATH_MAX)) != -1) { + if (rc < PATH_MAX) { + // we need to nul-terminate + b->lbuf[rc] = 0; + // last = next + strcpy(b->buf1, next); + last = b->buf1; + // next = join(dirname(next), link) + strcpy(b->buf2, next); + dir = dirname(b->buf2); + if ((next = _joinpaths(b->buf3, PATH_MAX, dir, b->lbuf))) { + // next now points to either: buf3, buf2, lbuf, rodata + strcpy(b->buf4, next); + next = b->buf4; + } else { + free(b); + return enametoolong(); + } + } else { + // symbolic link data was too long + free(b); + return enametoolong(); + } + } else if (errno == EINVAL) { + // next wasn't a symbolic link + errno = err; + path = next; + break; + } else if (i && (errno == ENOENT || errno == ENOTDIR)) { + // next is a broken symlink, use last + errno = err; + path = last; + break; + } else { + // readlink failed for some other reason + free(b); + return -1; + } + } + + // now we can open the path + rc = sys_open(path, O_PATH | O_NOFOLLOW | O_CLOEXEC, 0); + free(b); + if (rc == -1) return rc; + pb.parent_fd = rc; struct stat st; if ((rc = sys_fstat(pb.parent_fd, &st)) == -1) { return err_close(rc, pb.parent_fd); } - if (!S_ISDIR(st.st_mode)) pb.allowed_access &= FILE_BITS; + if (!S_ISDIR(st.st_mode)) { + pb.allowed_access &= FILE_BITS; + } if ((rc = landlock_add_rule(State.fd, LANDLOCK_RULE_PATH_BENEATH, &pb, 0))) { return err_close(rc, pb.parent_fd); } @@ -177,9 +259,9 @@ static int sys_unveil_linux(const char *path, const char *permissions) { /** * Restricts filesystem operations, e.g. * - * unveil(".", "r"); // current directory + children are visible - * unveil("/etc", "r"); // make /etc readable too - * unveil(0, 0); // commit and lock policy + * unveil(".", "r"); // current directory + children are visible + * unveil("/etc", "r"); // make /etc readable too + * unveil(0, 0); // commit and lock policy * * Unveiling restricts a thread's view of the filesystem to a set of * allowed paths with specific privileges. diff --git a/libc/runtime/morph.greg.c b/libc/runtime/morph.greg.c index 59ad7ff2a..06135ad19 100644 --- a/libc/runtime/morph.greg.c +++ b/libc/runtime/morph.greg.c @@ -29,6 +29,7 @@ #include "libc/nt/memory.h" #include "libc/nt/runtime.h" #include "libc/nt/thunk/msabi.h" +#include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/sysv/consts/nr.h" #include "libc/sysv/consts/prot.h" @@ -41,13 +42,22 @@ static sigset_t oldss; static privileged void __morph_mprotect(void *addr, size_t size, int prot, int ntprot) { + bool cf; int ax, dx; uint32_t op; if (!IsWindows()) { - asm volatile("syscall" - : "=a"(ax), "=d"(dx) - : "0"(__NR_mprotect), "D"(addr), "S"(size), "1"(prot) + asm volatile(CFLAG_ASM("clc\n\t" + "syscall") + : CFLAG_CONSTRAINT(cf), "=a"(ax), "=d"(dx) + : "1"(__NR_mprotect), "D"(addr), "S"(size), "2"(prot) : "rcx", "r11", "memory"); +#ifndef NDEBUG + if (cf) ax = -ax; + if (ax == -EPERM) { + kprintf("error: need pledge(prot_exec) permission to code morph\n"); + _Exit(26); + } +#endif } else { __imp_VirtualProtect(addr, size, ntprot, &op); } diff --git a/libc/str/path.h b/libc/str/path.h index 74c65a91f..841c5d5d1 100644 --- a/libc/str/path.h +++ b/libc/str/path.h @@ -14,6 +14,7 @@ COSMOPOLITAN_C_START_ int _classifypath(const char *) libcesque nosideeffect; bool _isabspath(const char *) libcesque strlenesque; bool _isdirsep(int) libcesque pureconst; +char *_joinpaths(char *, size_t, const char *, const char *); COSMOPOLITAN_C_END_ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ diff --git a/libc/sysv/calls/landlock_add_rule.s b/libc/sysv/calls/landlock_add_rule.s deleted file mode 100644 index 07acd5ea4..000000000 --- a/libc/sysv/calls/landlock_add_rule.s +++ /dev/null @@ -1,2 +0,0 @@ -.include "o/libc/sysv/macros.internal.inc" -.scall landlock_add_rule,0xfffffffffffff1bd,globl diff --git a/libc/sysv/calls/landlock_create_ruleset.s b/libc/sysv/calls/landlock_create_ruleset.s deleted file mode 100644 index 7b070ec41..000000000 --- a/libc/sysv/calls/landlock_create_ruleset.s +++ /dev/null @@ -1,2 +0,0 @@ -.include "o/libc/sysv/macros.internal.inc" -.scall landlock_create_ruleset,0xfffffffffffff1bc,globl diff --git a/libc/sysv/calls/landlock_restrict_self.s b/libc/sysv/calls/landlock_restrict_self.s deleted file mode 100644 index 69fb78e58..000000000 --- a/libc/sysv/calls/landlock_restrict_self.s +++ /dev/null @@ -1,2 +0,0 @@ -.include "o/libc/sysv/macros.internal.inc" -.scall landlock_restrict_self,0xfffffffffffff1be,globl diff --git a/libc/sysv/calls/pselect.s b/libc/sysv/calls/pselect.s index eeb03d66a..a6db42be1 100644 --- a/libc/sysv/calls/pselect.s +++ b/libc/sysv/calls/pselect.s @@ -1,2 +1,2 @@ .include "o/libc/sysv/macros.internal.inc" -.scall pselect,0x1b406e20a218afff,globl +.scall pselect,0x1b406e20a218a10e,globl diff --git a/libc/sysv/calls/pselect6.s b/libc/sysv/calls/pselect6.s deleted file mode 100644 index 6daf1d0ae..000000000 --- a/libc/sysv/calls/pselect6.s +++ /dev/null @@ -1,2 +0,0 @@ -.include "o/libc/sysv/macros.internal.inc" -.scall pselect6,0xfffffffffffff10e,globl diff --git a/libc/sysv/calls/sys_landlock_add_rule.s b/libc/sysv/calls/sys_landlock_add_rule.s new file mode 100644 index 000000000..597f24ce7 --- /dev/null +++ b/libc/sysv/calls/sys_landlock_add_rule.s @@ -0,0 +1,2 @@ +.include "o/libc/sysv/macros.internal.inc" +.scall sys_landlock_add_rule,0xfffffffffffff1bd,globl,hidden diff --git a/libc/sysv/calls/sys_landlock_create_ruleset.s b/libc/sysv/calls/sys_landlock_create_ruleset.s new file mode 100644 index 000000000..6f950a7b5 --- /dev/null +++ b/libc/sysv/calls/sys_landlock_create_ruleset.s @@ -0,0 +1,2 @@ +.include "o/libc/sysv/macros.internal.inc" +.scall sys_landlock_create_ruleset,0xfffffffffffff1bc,globl,hidden diff --git a/libc/sysv/calls/sys_landlock_restrict_self.s b/libc/sysv/calls/sys_landlock_restrict_self.s new file mode 100644 index 000000000..fe414d2de --- /dev/null +++ b/libc/sysv/calls/sys_landlock_restrict_self.s @@ -0,0 +1,2 @@ +.include "o/libc/sysv/macros.internal.inc" +.scall sys_landlock_restrict_self,0xfffffffffffff1be,globl,hidden diff --git a/libc/sysv/consts.sh b/libc/sysv/consts.sh index 33e9a1ca3..ea7cc9013 100755 --- a/libc/sysv/consts.sh +++ b/libc/sysv/consts.sh @@ -132,6 +132,7 @@ syscon errno EMULTIHOP 72 95 90 0 94 0 # barely in posix syscon errno ENOLINK 67 97 91 0 95 0 # barely in posix syscon errno ENOMEDIUM 123 0 0 85 0 0 # not posix; not documented syscon errno EMEDIUMTYPE 124 0 0 86 0 0 # not posix; not documented +syscon errno EBADFD 77 9 0 0 0 0 # file descriptor in bad state syscon compat EWOULDBLOCK 11 35 35 35 35 10035 # same as EAGAIN on every platform we've seen # signals @@ -2583,7 +2584,6 @@ syscon junkerr ESRMNT 69 0 0 0 0 0 syscon junkerr ECOMM 70 0 0 0 0 0 syscon junkerr EDOTDOT 73 0 0 0 0 0 syscon junkerr ENOTUNIQ 76 0 0 0 0 0 -syscon junkerr EBADFD 77 9 0 0 0 0 syscon junkerr EREMCHG 78 0 0 0 0 0 syscon junkerr ELIBACC 79 0 0 0 0 0 syscon junkerr ELIBBAD 80 0 0 0 0 0 diff --git a/libc/sysv/consts/clone.h b/libc/sysv/consts/clone.h index bd8102970..f88c712ff 100644 --- a/libc/sysv/consts/clone.h +++ b/libc/sysv/consts/clone.h @@ -1,15 +1,7 @@ #ifndef COSMOPOLITAN_LIBC_SYSV_CONSTS_CLONE_H_ #define COSMOPOLITAN_LIBC_SYSV_CONSTS_CLONE_H_ -#include "libc/runtime/symbolic.h" -#if !(__ASSEMBLER__ + __LINKER__ + 0) -COSMOPOLITAN_C_START_ -extern const long CLONE_VM; - -COSMOPOLITAN_C_END_ -#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ - -#define CLONE_VM SYMBOLIC(CLONE_VM) +#define CLONE_VM 0x00000100 #define CLONE_FS 0x00000200 #define CLONE_FILES 0x00000400 #define CLONE_SIGHAND 0x00000800 diff --git a/libc/sysv/syscalls.sh b/libc/sysv/syscalls.sh index 680872a83..fa8d5eb46 100755 --- a/libc/sysv/syscalls.sh +++ b/libc/sysv/syscalls.sh @@ -332,7 +332,7 @@ scall timerfd_gettime 0xfffffffffffff11f globl # won't polyfill; see INTON/INTO scall recvmmsg 0x1dbffffffffff12b globl # ├─ end of life 2024-06-30 (extended) scall fanotify_init 0xfffffffffffff12c globl # ├─ last distro with the original gnome desktop scall fanotify_mark 0xfffffffffffff12d globl # └─ apple and google condemn the gplv3/gccrtev3 -scall prlimit 0xfffffffffffff12e globl +scall prlimit 0xfffffffffffff12e globl # a.k.a. prlimit64() scall name_to_handle_at 0xfffffffffffff12f globl scall open_by_handle_at 0xfffffffffffff130 globl scall clock_adjtime 0xfffffffffffff131 globl @@ -391,9 +391,9 @@ scall process_madvise 0xfffffffffffff1b8 globl scall epoll_pwait2 0xfffffffffffff1b9 globl scall mount_setattr 0xfffffffffffff1ba globl scall quotactl_fd 0xfffffffffffff1bb globl -scall landlock_create_ruleset 0xfffffffffffff1bc globl -scall landlock_add_rule 0xfffffffffffff1bd globl -scall landlock_restrict_self 0xfffffffffffff1be globl +scall sys_landlock_create_ruleset 0xfffffffffffff1bc globl hidden # Linux 5.13+ +scall sys_landlock_add_rule 0xfffffffffffff1bd globl hidden +scall sys_landlock_restrict_self 0xfffffffffffff1be globl hidden scall memfd_secret 0xfffffffffffff1bf globl scall process_mrelease 0xfffffffffffff1c0 globl scall futex_waitv 0xfffffffffffff1c1 globl diff --git a/test/libc/fmt/joinpaths_test.c b/test/libc/fmt/joinpaths_test.c new file mode 100644 index 000000000..5cc298d40 --- /dev/null +++ b/test/libc/fmt/joinpaths_test.c @@ -0,0 +1,38 @@ +/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│ +│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Copyright 2022 Justine Alexandra Roberts Tunney │ +│ │ +│ Permission to use, copy, modify, and/or distribute this software for │ +│ any purpose with or without fee is hereby granted, provided that the │ +│ above copyright notice and this permission notice appear in all copies. │ +│ │ +│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │ +│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │ +│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │ +│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │ +│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │ +│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │ +│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ +│ PERFORMANCE OF THIS SOFTWARE. │ +╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/str/path.h" +#include "libc/testlib/testlib.h" + +TEST(xjoinpaths, test) { + char b[PATH_MAX]; + EXPECT_EQ(NULL, _joinpaths(b, sizeof(b), 0, 0)); + EXPECT_STREQ("x", _joinpaths(b, sizeof(b), "x", 0)); + EXPECT_STREQ("x", _joinpaths(b, sizeof(b), 0, "x")); + EXPECT_STREQ("", _joinpaths(b, sizeof(b), "", "")); + EXPECT_STREQ("b", _joinpaths(b, sizeof(b), "", "b")); + EXPECT_STREQ("a/b", _joinpaths(b, sizeof(b), "a", "b")); + EXPECT_STREQ("a/b", _joinpaths(b, sizeof(b), "a/", "b")); + EXPECT_STREQ("a/b/", _joinpaths(b, sizeof(b), "a", "b/")); + EXPECT_STREQ("/b", _joinpaths(b, sizeof(b), "a", "/b")); + EXPECT_STREQ("b", _joinpaths(b, sizeof(b), ".", "b")); + EXPECT_EQ(NULL, _joinpaths(b, 3, "a", "b/")); + EXPECT_EQ(NULL, _joinpaths(b, 4, "a", "b/")); + EXPECT_STREQ("a/b", _joinpaths(b, 4, "a/", "b")); + EXPECT_STREQ("a/b/", _joinpaths(b, 5, "a", "b/")); +} diff --git a/test/libc/mem/pledge_test.c b/test/libc/mem/pledge_test.c index 9e2a7f211..c18ec356e 100644 --- a/test/libc/mem/pledge_test.c +++ b/test/libc/mem/pledge_test.c @@ -32,9 +32,11 @@ #include "libc/macros.internal.h" #include "libc/mem/io.h" #include "libc/mem/mem.h" +#include "libc/runtime/internal.h" #include "libc/runtime/runtime.h" #include "libc/sock/sock.h" #include "libc/sock/struct/sockaddr.h" +#include "libc/stdio/stdio.h" #include "libc/sysv/consts/af.h" #include "libc/sysv/consts/at.h" #include "libc/sysv/consts/f.h" @@ -47,8 +49,10 @@ #include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/sig.h" #include "libc/sysv/consts/sock.h" +#include "libc/testlib/ezbench.h" #include "libc/testlib/testlib.h" #include "libc/thread/spawn.h" +#include "libc/time/time.h" STATIC_YOINK("zip_uri_support"); @@ -58,6 +62,8 @@ void OnSig(int sig) { // do nothing } +int memfd_secret(unsigned int); // our ENOSYS threshold + int extract(const char *from, const char *to, int mode) { int fdin, fdout; if ((fdin = open(from, O_RDONLY)) == -1) return -1; @@ -306,18 +312,17 @@ TEST(pledge, mmap) { EXPECT_TRUE(WIFEXITED(ws) && !WEXITSTATUS(ws)); } -TEST(pledge, mmapExec) { +TEST(pledge, mmapProtExec) { if (IsOpenbsd()) return; // b/c testing linux bpf char *p; int ws, pid; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio exec", "stdio")); + ASSERT_SYS(0, 0, pledge("stdio prot_exec", 0)); ASSERT_NE(MAP_FAILED, (p = mmap(0, FRAMESIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0))); ASSERT_SYS(0, 0, mprotect(p, FRAMESIZE, PROT_READ)); - ASSERT_SYS(EPERM, MAP_FAILED, - mprotect(p, FRAMESIZE, PROT_READ | PROT_EXEC)); + ASSERT_SYS(0, 0, mprotect(p, FRAMESIZE, PROT_READ | PROT_EXEC)); ASSERT_NE(MAP_FAILED, mmap(0, FRAMESIZE, PROT_EXEC | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0)); _Exit(0); @@ -440,7 +445,7 @@ TEST(pledge, execpromises_ok) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio execnative", "stdio")); + ASSERT_SYS(0, 0, pledge("stdio exec", "stdio")); execl("life.elf", "life.elf", 0); _Exit(127); } @@ -455,7 +460,7 @@ TEST(pledge, execpromises_notok) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio execnative", "stdio")); + ASSERT_SYS(0, 0, pledge("stdio exec", "stdio")); execl("sock.elf", "sock.elf", 0); _Exit(127); } @@ -470,7 +475,7 @@ TEST(pledge, execpromises_reducesAtExecOnLinux) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio inet tty execnative", "stdio tty")); + ASSERT_SYS(0, 0, pledge("stdio inet tty exec", "stdio tty")); execl("sock.elf", "sock.elf", 0); _Exit(127); } @@ -485,7 +490,7 @@ TEST(pledge_openbsd, execpromisesIsNull_letsItDoAnything) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio execnative", 0)); + ASSERT_SYS(0, 0, pledge("stdio exec", 0)); execl("sock.elf", "sock.elf", 0); _Exit(127); } @@ -500,7 +505,7 @@ TEST(pledge_openbsd, execpromisesIsSuperset_letsItDoAnything) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio rpath execnative", "stdio rpath tty inet")); + ASSERT_SYS(0, 0, pledge("stdio rpath exec", "stdio rpath tty inet")); execl("sock.elf", "sock.elf", 0); _Exit(127); } @@ -511,7 +516,7 @@ TEST(pledge_openbsd, execpromisesIsSuperset_letsItDoAnything) { TEST(pledge_linux, execpromisesIsSuperset_notPossible) { if (IsOpenbsd()) return; - ASSERT_SYS(EINVAL, -1, pledge("stdio execnative", "stdio inet execnative")); + ASSERT_SYS(EINVAL, -1, pledge("stdio exec", "stdio inet exec")); } TEST(pledge_openbsd, execpromises_notok) { @@ -520,7 +525,7 @@ TEST(pledge_openbsd, execpromises_notok) { struct stat st; ASSERT_NE(-1, (pid = fork())); if (!pid) { - ASSERT_SYS(0, 0, pledge("stdio execnative", "stdio")); + ASSERT_SYS(0, 0, pledge("stdio exec", "stdio")); execl("sock.elf", "sock.elf", 0); _Exit(127); } @@ -528,3 +533,51 @@ TEST(pledge_openbsd, execpromises_notok) { EXPECT_TRUE(WIFSIGNALED(ws)); EXPECT_EQ(SIGABRT, WTERMSIG(ws)); } + +TEST(pledge_openbsd, bigSyscalls) { + if (IsOpenbsd()) return; // testing lunix + int ws, pid; + struct stat st; + ASSERT_NE(-1, (pid = fork())); + if (!pid) { + ASSERT_SYS(0, 0, pledge("stdio", 0)); + ASSERT_SYS(ENOSYS, -1, memfd_secret(0)); + ASSERT_SYS(ENOSYS, -1, sys_bogus()); + _Exit(0); + } + EXPECT_NE(-1, wait(&ws)); + EXPECT_TRUE(WIFEXITED(ws)); + EXPECT_EQ(0, WEXITSTATUS(ws)); +} + +int LockWorker(void *arg, int tid) { + flockfile(stdout); + ASSERT_EQ(gettid(), stdout->lock.lock); + funlockfile(stdout); + return 0; +} + +TEST(pledge, threadWithLocks_canCodeMorph) { + struct spawn worker; + int ws, pid; + // not sure how this works on OpenBSD but it works! + if (!fork()) { + ASSERT_SYS(0, 0, pledge("stdio prot_exec", 0)); + ASSERT_SYS(0, 0, _spawn(LockWorker, 0, &worker)); + ASSERT_SYS(0, 0, _join(&worker)); + _Exit(0); + } + EXPECT_NE(-1, wait(&ws)); + EXPECT_TRUE(WIFEXITED(ws)); + EXPECT_EQ(0, WEXITSTATUS(ws)); +} + +BENCH(pledge, bench) { + int pid; + if (!fork()) { + ASSERT_SYS(0, 0, pledge("stdio", 0)); + EZBENCH2("sched_yield", donothing, sched_yield()); + _Exit(0); + } + wait(0); +} diff --git a/test/libc/mem/unveil_test.c b/test/libc/mem/unveil_test.c index c7e659739..2a9cebe5a 100644 --- a/test/libc/mem/unveil_test.c +++ b/test/libc/mem/unveil_test.c @@ -273,3 +273,23 @@ TEST(unveil, usedTwice_forbidden_worksWithPledge) { } EXPECT_SYS(0, 0, munmap(gotsome, FRAMESIZE)); } + +TEST(unveil, lotsOfPaths) { + int i, n; + SPAWN(); + n = 100; + for (i = 0; i < n; ++i) { + ASSERT_SYS(0, 0, touch(xasprintf("%d", i), 0644)); + ASSERT_SYS(0, 0, touch(xasprintf("%d-", i), 0644)); + } + for (i = 0; i < n; ++i) { + ASSERT_SYS(0, 0, unveil(xasprintf("%d", i), "rw")); + } + ASSERT_SYS(0, 0, unveil(0, 0)); + for (i = 0; i < n; ++i) { + ASSERT_SYS(0, 3, open(xasprintf("%d", i), O_RDONLY)); + ASSERT_SYS(0, 0, close(3)); + ASSERT_SYS(EACCES_OR_ENOENT, -1, open(xasprintf("%d-", i), O_RDONLY)); + } + EXITS(0); +} diff --git a/tool/build/pledge.c b/tool/build/pledge.c index c54676954..9e819a1ca 100644 --- a/tool/build/pledge.c +++ b/tool/build/pledge.c @@ -16,19 +16,26 @@ │ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │ │ PERFORMANCE OF THIS SOFTWARE. │ ╚─────────────────────────────────────────────────────────────────────────────*/ +#include "libc/bits/bits.h" #include "libc/calls/calls.h" +#include "libc/calls/landlock.h" #include "libc/calls/struct/rlimit.h" #include "libc/calls/struct/sched_param.h" +#include "libc/calls/struct/stat.h" #include "libc/calls/struct/sysinfo.h" #include "libc/calls/syscall-sysv.internal.h" #include "libc/dce.h" +#include "libc/elf/def.h" +#include "libc/elf/struct/ehdr.h" #include "libc/errno.h" #include "libc/fmt/conv.h" #include "libc/intrin/kprintf.h" +#include "libc/intrin/promises.internal.h" #include "libc/macros.internal.h" #include "libc/math.h" #include "libc/mem/mem.h" #include "libc/nexgen32e/kcpuids.h" +#include "libc/runtime/gc.internal.h" #include "libc/runtime/runtime.h" #include "libc/runtime/sysconf.h" #include "libc/sock/sock.h" @@ -37,12 +44,15 @@ #include "libc/stdio/strlist.internal.h" #include "libc/str/str.h" #include "libc/sysv/consts/ioprio.h" +#include "libc/sysv/consts/map.h" #include "libc/sysv/consts/o.h" #include "libc/sysv/consts/ok.h" #include "libc/sysv/consts/poll.h" #include "libc/sysv/consts/prio.h" +#include "libc/sysv/consts/prot.h" #include "libc/sysv/consts/rlimit.h" #include "libc/sysv/consts/sched.h" +#include "libc/sysv/errfuns.h" #include "libc/x/x.h" #include "third_party/getopt/getopt.h" @@ -55,7 +65,7 @@ usage: pledge.com [-hnN] PROG ARGS...\n\ -g GID call setgid()\n\ -u UID call setuid()\n\ -c PATH call chroot()\n\ - -v [PERM:]PATH call unveil(PATH,PERM) where PERM can have rwxc\n\ + -v [PERM:]PATH make PATH visible where PERM can have rwxc\n\ -n set maximum niceness\n\ -N don't normalize file descriptors\n\ -C SECS set cpu limit [default: inherited]\n\ @@ -75,11 +85,12 @@ usage: pledge.com [-hnN] PROG ARGS...\n\ - fattr: allow changing some struct stat bits\n\ - inet: allow IPv4 and IPv6\n\ - unix: allow local sockets\n\ - - dns: allow dns\n\ - - proc: allow fork, clone and friends\n\ - - thread: allow clone\n\ - id: allow setuid and friends\n\ - - exec: make execution more permissive\n\ + - dns: allow dns and related files\n\ + - proc: allow process and thread creation\n\ + - exec: implied by default\n\ + - prot_exec: allow creating executable memory\n\ + - vminfo: allows /proc/stat, /proc/self/maps, etc.\n\ \n\ pledge.com v1.1\n\ copyright 2022 justine alexandra roberts tunney\n\ @@ -94,6 +105,8 @@ the https://justine.lol/pledge/ page for online documentation.\n\ \n\ " +int ParsePromises(const char *, unsigned long *); + int g_gflag; int g_uflag; int g_hflag; @@ -115,8 +128,8 @@ static void GetOpts(int argc, char *argv[]) { int opt; struct sysinfo si; g_promises = 0; - g_proquota = GetCpuCount() * 2; g_fszquota = 256 * 1000 * 1000; + g_proquota = GetCpuCount() * 100; g_fszquota = 4 * 1000 * 1000 * 1000; g_memquota = 4L * 1024 * 1024 * 1024; if (!sysinfo(&si)) g_memquota = si.totalram; @@ -182,6 +195,13 @@ int GetBaseCpuFreqMhz(void) { return KCPUIDS(16H, EAX) & 0x7fff; } +static bool SupportsLandlock(void) { + int e = errno; + bool r = landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION) >= 0; + errno = e; + return r; +} + int GetPollMaxFds(void) { int n; struct rlimit rl; @@ -283,6 +303,61 @@ void SetProLimit(long n) { } } +bool PathExists(const char *path) { + int err; + struct stat st; + if (path) { + err = errno; + if (!stat(path, &st)) { + return true; + } else { + errno = err; + return false; + } + } else { + return false; + } +} + +bool IsDynamicExecutable(const char *prog) { + int fd; + Elf64_Ehdr e; + struct stat st; + if ((fd = open(prog, O_RDONLY)) == -1) { + kprintf("open(%#s, O_RDONLY) failed: %m\n", prog); + exit(13); + } + if (read(fd, &e, sizeof(e)) != sizeof(e)) { + kprintf("%s: read(64) failed: %m\n", prog); + exit(16); + } + close(fd); + return e.e_type == ET_DYN && // + READ32LE(e.e_ident) == READ32LE(ELFMAG); +} + +void Unveil(const char *path, const char *perm) { + if (unveil(path, perm) == -1) { + kprintf("error: unveil(%#s, %#s) failed: %m\n", path, perm); + _Exit(20); + } +} + +void UnveilIfExists(const char *path, const char *perm) { + int err; + if (path) { + err = errno; + if (unveil(path, perm) == -1) { + if (errno == ENOENT) { + errno = err; + } else { + kprintf("error: unveil(%#s, %#s) failed: %m\n", path, perm); + _Exit(20); + } + } + } +} + void MakeProcessNice(void) { if (!g_nice) return; if (setpriority(PRIO_PROCESS, 0, 19) == -1) { @@ -301,12 +376,119 @@ void MakeProcessNice(void) { } } +void ApplyFilesystemPolicy(unsigned long ipromises) { + + if (!SupportsLandlock()) { + if (unveils.n) { + kprintf("error: the unveil() -v flag needs Linux 5.13+\n"); + _Exit(20); + } + } + + Unveil(prog, "rx"); + + if (IsDynamicExecutable(prog)) { + UnveilIfExists("/lib", "rx"); + UnveilIfExists("/lib64", "rx"); + UnveilIfExists("/usr/lib", "rx"); + UnveilIfExists("/usr/lib64", "rx"); + UnveilIfExists("/usr/local/lib", "rx"); + UnveilIfExists("/usr/local/lib64", "rx"); + UnveilIfExists("/etc/ld-musl-x86_64.path", "r"); + UnveilIfExists("/etc/ld.so.conf", "r"); + UnveilIfExists("/etc/ld.so.cache", "r"); + UnveilIfExists("/etc/ld.so.conf.d", "r"); + UnveilIfExists("/etc/ld.so.preload", "r"); + } + + if (~ipromises & (1ul << PROMISE_STDIO)) { + UnveilIfExists("/dev/fd", "r"); + UnveilIfExists("/dev/log", "w"); + UnveilIfExists("/dev/zero", "r"); + UnveilIfExists("/dev/null", "rw"); + UnveilIfExists("/dev/full", "rw"); + UnveilIfExists("/dev/stdin", "rw"); + UnveilIfExists("/dev/stdout", "rw"); + UnveilIfExists("/dev/stderr", "rw"); + UnveilIfExists("/dev/urandom", "r"); + UnveilIfExists("/dev/localtime", "r"); + UnveilIfExists("/proc/self/fd", "rw"); + UnveilIfExists("/proc/self/stat", "r"); + UnveilIfExists("/proc/self/status", "r"); + UnveilIfExists("/usr/share/locale", "r"); + UnveilIfExists("/proc/self/cmdline", "r"); + UnveilIfExists("/usr/share/zoneinfo", "r"); + UnveilIfExists("/proc/sys/kernel/version", "r"); + UnveilIfExists("/usr/share/common-licenses", "r"); + UnveilIfExists("/proc/sys/kernel/ngroups_max", "r"); + UnveilIfExists("/proc/sys/kernel/cap_last_cap", "r"); + UnveilIfExists("/proc/sys/vm/overcommit_memory", "r"); + } + + if (~ipromises & (1ul << PROMISE_INET)) { + UnveilIfExists("/etc/ssl/certs/ca-certificates.crt", "r"); + } + + if (~ipromises & (1ul << PROMISE_RPATH)) { + UnveilIfExists("/proc/filesystems", "r"); + } + + if (~ipromises & (1ul << PROMISE_DNS)) { + UnveilIfExists("/etc/hosts", "r"); + UnveilIfExists("/etc/hostname", "r"); + UnveilIfExists("/etc/services", "r"); + UnveilIfExists("/etc/protocols", "r"); + UnveilIfExists("/etc/resolv.conf", "r"); + } + + if (~ipromises & (1ul << PROMISE_TTY)) { + UnveilIfExists(ttyname(0), "rw"); // 1-up apparmor + UnveilIfExists("/etc/tty", "rw"); + UnveilIfExists("/etc/console", "rw"); + UnveilIfExists("/usr/share/terminfo", "r"); + } + + if (~ipromises & (1ul << PROMISE_PROT_EXEC)) { + UnveilIfExists("/usr/bin/ape", "rx"); + } + + if (~ipromises & (1ul << PROMISE_VMINFO)) { + UnveilIfExists("/proc/stat", "r"); + UnveilIfExists("/proc/meminfo", "r"); + UnveilIfExists("/proc/cpuinfo", "r"); + UnveilIfExists("/proc/diskstats", "r"); + UnveilIfExists("/proc/self/maps", "r"); + UnveilIfExists("/sys/devices/system/cpu", "r"); + } + + for (int i = 0; i < unveils.n; ++i) { + char *s, *t; + const char *path; + const char *perm; + s = unveils.p[i]; + if ((t = strchr(s, ':'))) { + *t = 0; + perm = s; + path = t + 1; + } else { + perm = "r"; + path = s; + } + Unveil(path, perm); + } + + if (unveil(0, 0) == -1) { + kprintf("error: unveil(0, 0) failed: %m\n"); + _Exit(20); + } +} + int main(int argc, char *argv[]) { - int i; bool hasfunbits; int useruid, usergid; int owneruid, ownergid; int oldfsuid, oldfsgid; + unsigned long ipromises; if (!IsLinux()) { kprintf("error: this program is only intended for linux\n"); @@ -365,13 +547,6 @@ int main(int argc, char *argv[]) { } // change root fs path - // all the documentation on the subject is unprofessional and crazy - // the linux devs willfully deprive linux users of security tools - // linux appears to not even forbid chroot on setuid binaries - // yes i've considered fchdir() and i don't really care - // ohh it's sooo insecure they say, and they solve it - // by imposing a requirement that we must only do - // the "insecure" thing as the root user lool if (g_chroot) { if (chdir(g_chroot) == -1) { kprintf("error: chdir(%#s) failed: %m\n", g_chroot); @@ -443,50 +618,26 @@ int main(int argc, char *argv[]) { } } - if (unveils.n) { - if (unveil(prog, "rx") == -1) { - kprintf("error: unveil(0, 0) failed: %m\n", prog, "rx"); - _Exit(20); - } - if (strstr(g_promises, "exec") && isexecutable("/usr/bin/ape")) { - if (unveil("/usr/bin/ape", "rx") == -1) { - kprintf("error: unveil(0, 0) failed: %m\n", "/usr/bin/ape", "rx"); - _Exit(20); - } - } - for (i = 0; i < unveils.n; ++i) { - char *s, *t; - const char *path; - const char *perm; - s = unveils.p[i]; - if ((t = strchr(s, ':'))) { - *t = 0; - perm = s; - path = t + 1; - } else { - perm = "r"; - path = s; - } - if (unveil(path, perm) == -1) { - kprintf("error: unveil(%#s, %#s) failed: %m\n", path, perm); - _Exit(20); - } - } - if (unveil(0, 0) == -1) { - kprintf("error: unveil(0, 0) failed: %m\n"); - _Exit(20); - } + if (ParsePromises(g_promises, &ipromises) == -1) { + kprintf("error: bad promises list: %s\n", g_promises); + _Exit(21); + } + + ApplyFilesystemPolicy(ipromises); + + // we always need exec which is a weakness of this model + if (!(~ipromises & (1ul << PROMISE_EXEC))) { + g_promises = xstrcat(g_promises, ' ', "exec"); } // apply sandbox - g_promises = xstrcat(g_promises, ' ', "execnative"); if (pledge(g_promises, g_promises) == -1) { kprintf("error: pledge(%#s) failed: %m\n", g_promises); _Exit(19); } // launch program - execve(prog, argv + optind, environ); + sys_execve(prog, argv + optind, environ); kprintf("%s: execve failed: %m\n", prog); return 127; } diff --git a/tool/net/help.txt b/tool/net/help.txt index 96219f1c3..a3de2c230 100644 --- a/tool/net/help.txt +++ b/tool/net/help.txt @@ -3668,17 +3668,17 @@ UNIX MODULE stdio - Allows read, write, send, recv, recvfrom, close, - clock_getres, clock_gettime, dup, dup2, dup3, fchdir, fstat, - fsync, fdatasync, ftruncate, getdents, getegid, getrandom, - geteuid, getgid, getgroups, getitimer, getpgid, getpgrp, getpid, - getppid, getresgid, getresuid, getrlimit, getsid, gettimeofday, - getuid, lseek, madvise, brk, mmap/mprotect (PROT_EXEC isn't - allowed), msync, munmap, gethostname, nanosleep, pipe, pipe2, - poll, setitimer, shutdown, sigaction, sigsuspend, sigprocmask, - socketpair, umask, wait4, ioctl(FIONREAD), ioctl(FIONBIO), - ioctl(FIOCLEX), ioctl(FIONCLEX), fcntl(F_GETFD), fcntl(F_SETFD), - fcntl(F_GETFL), fcntl(F_SETFL). + Allows read, write, send, recv, recvfrom, close, clock_getres, + clock_gettime, dup, fchdir, fstat, fsync, fdatasync, ftruncate, + getdents, getegid, getrandom, geteuid, getgid, getgroups, + getitimer, getpgid, getpgrp, getpid, hgetppid, getresgid, + getresuid, getrlimit, getsid, gettimeofday, getuid, lseek, + madvise, brk, mmap/mprotect (PROT_EXEC isn't allowed), msync, + munmap, gethostname, nanosleep, pipe, pipe2, poll, setitimer, + shutdown, sigaction, sigsuspend, sigprocmask, socketpair, umask, + wait4, getrusage, ioctl(FIONREAD), ioctl(FIONBIO), ioctl(FIOCLEX), + ioctl(FIONCLEX), fcntl(F_GETFD), fcntl(F_SETFD), fcntl(F_GETFL), + fcntl(F_SETFL). rpath @@ -3717,11 +3717,19 @@ UNIX MODULE dns - Allows sendto, recvfrom, socket (AF_INET), connect. + Allows sendto, recvfrom, socket(AF_INET), connect. + + recvfd + + Allows recvmsg, recvmmsg. + + sendfd + + Allows sendmsg, sendmmsg. proc - Allows fork, vfork, clone, kill, getpriority, setpriority, + Allows fork, vfork, clone, kill, tgkill, getpriority, setpriority, setrlimit, setpgid, setsid. id @@ -3729,26 +3737,30 @@ UNIX MODULE Allows setuid, setreuid, setresuid, setgid, setregid, setresgid, setgroups, setrlimit, getpriority, setpriority. + settime + + Allows settimeofday and clock_adjtime. + + unveil + + Allows unveil(). + exec - Allows execve, access. + Allows execve, access, faccessat, openat(O_RDONLY). - On Linux this also weakens some security to permit running APE - binaries. However on OpenBSD they must be assimilate beforehand. - On Linux, mmap() will be loosened up to allow creating PROT_EXEC - memory (for APE loader) and system call origin verification won't - be activated. + If the executable in question needs a loader, then you may need + "prot_exec" too. With APE, security will be stronger if you + assimilate your binaries beforehand, using the --assimilate flag, + or the o//tool/build/assimilate.com program. - execnative + prot_exec - Allows execve, execveat. + Allows mmap(PROT_EXEC) and mprotect(PROT_EXEC). - Can only be used to run native executables; you won't be able to - run APE binaries. mmap() and mprotect() are still prevented from - creating executable memory. System call origin verification can't - be enabled. If you always assimilate your APE binaries, then this - should be preferred. On OpenBSD this will be rewritten to be - "exec". + This may be needed to launch non-static non-native executables, + such as non-assimilated APE binaries, or programs that link + dynamic shared objects, i.e. most Linux distro binaries. `execpromises` only matters if "exec" or "execnative" are specified in `promises`. In that case, this specifies the promises that'll