Improve pledge() usability and consistency

- We now kill the program on violations like OpenBSD
- We now print a message explaining which promise is needed
- This change also fixes a linkage bug with thread local storage
- Your sigaction() handlers should now be more thread safe

A new `__pledge_mode` global has been introduced to make pledge() more
customizable on Linux. For example:

    __attribute__((__constructor__)) static void init(void) {
      __pledge_mode = SECCOMP_RET_ERRNO | EPERM;
    }

Can be used to restore our old permissive pledge() behavior.
This commit is contained in:
Justine Tunney 2022-08-07 16:18:33 -07:00
parent 13c1c45075
commit 5546559034
30 changed files with 713 additions and 86 deletions

View file

@ -19,17 +19,5 @@
#include "libc/calls/state.internal.h"
#include "libc/intrin/pthread.h"
unsigned __sighandrvas[NSIG];
unsigned __sighandflags[NSIG];
static pthread_mutex_t __sig_lock_obj;
void(__sig_lock)(void) {
__sig_lock_obj.attr = PTHREAD_MUTEX_RECURSIVE;
pthread_mutex_lock(&__sig_lock_obj);
}
void(__sig_unlock)(void) {
__sig_lock_obj.attr = PTHREAD_MUTEX_RECURSIVE;
pthread_mutex_unlock(&__sig_lock_obj);
}
_Thread_local unsigned __sighandrvas[NSIG];
_Thread_local unsigned __sighandflags[NSIG];

View file

@ -23,18 +23,27 @@
#include "libc/calls/struct/bpf.h"
#include "libc/calls/struct/filter.h"
#include "libc/calls/struct/seccomp.h"
#include "libc/calls/struct/sigaction.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/fmt/itoa.h"
#include "libc/intrin/kprintf.h"
#include "libc/intrin/promises.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/limits.h"
#include "libc/macros.internal.h"
#include "libc/nexgen32e/bsr.h"
#include "libc/nexgen32e/threaded.h"
#include "libc/runtime/runtime.h"
#include "libc/runtime/stack.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/audit.h"
#include "libc/sysv/consts/kern.h"
#include "libc/sysv/consts/nrlinux.h"
#include "libc/sysv/consts/o.h"
#include "libc/sysv/consts/pr.h"
#include "libc/sysv/consts/prot.h"
#include "libc/sysv/consts/sa.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#define SPECIAL 0xf000
@ -63,6 +72,13 @@
#define PLEDGE(pledge) pledge, ARRAYLEN(pledge)
#define AbortPledge(reason) \
do { \
assert(!reason); \
asm("hlt"); \
unreachable; \
} while (0)
struct Filter {
size_t n;
struct sock_filter p[700];
@ -79,6 +95,7 @@ static const uint16_t kPledgeLinuxDefault[] = {
// difference in the latency of sched_yield() if it's at the start of
// the bpf script or the end.
static const uint16_t kPledgeLinuxStdio[] = {
__NR_linux_sigreturn, //
__NR_linux_exit_group, //
__NR_linux_sched_yield, //
__NR_linux_sched_getaffinity, //
@ -175,7 +192,6 @@ static const uint16_t kPledgeLinuxStdio[] = {
__NR_linux_sigaltstack, //
__NR_linux_sigprocmask, //
__NR_linux_sigsuspend, //
__NR_linux_sigreturn, //
__NR_linux_sigpending, //
__NR_linux_socketpair, //
__NR_linux_getrusage, //
@ -432,7 +448,7 @@ static const struct sock_filter kFilterStart[] = {
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
// each filter assumes ordinal is already loaded into accumulator
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
// Forbid some system calls with ENOSYS (rather than EPERM)
// forbid some system calls with ENOSYS (rather than EPERM)
BPF_JUMP(BPF_JMP | BPF_JGE | BPF_K, __NR_linux_memfd_secret, 5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_rseq, 4, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_memfd_create, 3, 0),
@ -442,15 +458,91 @@ static const struct sock_filter kFilterStart[] = {
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (38 & SECCOMP_RET_DATA)),
};
static const struct sock_filter kFilterEnd[] = {
// if syscall isn't whitelisted then have it return -EPERM (-1)
static const struct sock_filter kFilterIgnoreExitGroup[] = {
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_exit_group, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (1 & SECCOMP_RET_DATA)),
};
static void Log(const char *s, ...) {
va_list va;
va_start(va, s);
do {
write(2, s, strlen(s));
} while ((s = va_arg(va, const char *)));
va_end(va);
}
static bool HasSyscall(struct Pledges *p, uint16_t n) {
int i;
for (i = 0; i < p->len; ++i) {
if ((p->syscalls[i] & 0x0fff) == n) {
return true;
}
}
return false;
}
static char *FixCpy(char p[17], uint64_t x, uint8_t k) {
while (k > 0) *p++ = "0123456789abcdef"[(x >> (k -= 4)) & 15];
*p = '\0';
return p;
}
static char *HexCpy(char p[17], uint64_t x) {
return FixCpy(p, x, ROUNDUP(x ? bsrl(x) + 1 : 1, 4));
}
static void OnSigSys(int sig, siginfo_t *si, ucontext_t *ctx) {
int i;
bool found;
char ord[17], rip[17];
struct sigaction dfl = {.sa_sigaction = SIG_DFL};
ctx->uc_mcontext.rax = -si->si_errno;
FixCpy(ord, si->si_syscall, 12);
HexCpy(rip, ctx->uc_mcontext.rip);
for (found = i = 0; i < ARRAYLEN(kPledgeLinux); ++i) {
if (HasSyscall(kPledgeLinux + i, si->si_syscall)) {
Log("error: has not pledged ", kPledgeLinux[i].name, //
" (ord=", ord, " rip=", rip, ")\n", 0);
found = true;
break;
}
}
if (!found) {
Log("error: unsupported syscall (ord=", ord, " rip=", rip, ")\n", 0);
}
switch (__pledge_mode) {
case SECCOMP_RET_KILL_PROCESS:
if (!sigaction(SIGABRT, &dfl, 0)) {
sys_kill(getpid(), SIGABRT, 1);
}
_Exit(128 + SIGABRT);
case SECCOMP_RET_KILL_THREAD:
if (!sigaction(SIGABRT, &dfl, 0)) {
sys_tgkill(getpid(), gettid(), SIGABRT);
}
_Exit1(128 + SIGABRT);
default:
break;
}
}
static void MonitorSigSys(void) {
static _Thread_local bool once;
if (once) return;
once = true;
struct sigaction sa = {
.sa_sigaction = OnSigSys,
.sa_flags = SA_SIGINFO | SA_RESTART,
};
if (sigaction(SIGSYS, &sa, 0)) {
AbortPledge("sigaction failed");
}
}
static void AppendFilter(struct Filter *f, struct sock_filter *p, size_t n) {
if (UNLIKELY(f->n + n > ARRAYLEN(f->p))) {
asm("hlt"); // need to increase array size
unreachable;
AbortPledge("need to increase array size");
}
memcpy(f->p + f->n, p, n * sizeof(*f->p));
f->n += n;
@ -1093,20 +1185,22 @@ static void AllowSocketUnix(struct Filter *f) {
// - PR_SET_SECCOMP (22)
// - PR_SET_NO_NEW_PRIVS (38)
// - PR_CAPBSET_READ (23)
// - PR_CAPBSET_DROP (24)
//
static void AllowPrctlStdio(struct Filter *f) {
static const struct sock_filter fragment[] = {
/*L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prctl, 0, 10 - 1),
/*L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/*L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 5, 0),
/*L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 16, 4, 0),
/*L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 3, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 2, 0),
/*L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 23, 1, 0),
/*L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 38, 0, 1),
/*L7*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L8*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L9*/ /* next filter */
/* L0*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_prctl, 0, 11 - 1),
/* L1*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(args[0])),
/* L2*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 15, 6, 0),
/* L3*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 16, 5, 0),
/* L4*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 21, 4, 0),
/* L5*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 22, 3, 0),
/* L6*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 23, 2, 0),
/* L7*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 24, 1, 0),
/* L8*/ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 38, 0, 1),
/* L9*/ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
/*L10*/ BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
/*L11*/ /* next filter */
};
AppendFilter(f, PLEDGE(fragment));
}
@ -1220,8 +1314,7 @@ static void AppendPledge(struct Filter *f, const uint16_t *p, size_t len) {
};
AppendFilter(f, PLEDGE(fragment));
} else {
asm("hlt"); // list of ordinals exceeds max displacement
unreachable;
AbortPledge("list of ordinals exceeds max displacement");
}
}
@ -1311,8 +1404,7 @@ static void AppendPledge(struct Filter *f, const uint16_t *p, size_t len) {
AllowPrlimitStdio(f);
break;
default:
asm("hlt"); // switch forgot to define a special ordinal
unreachable;
AbortPledge("switch forgot to define a special ordinal");
}
}
}
@ -1322,7 +1414,14 @@ int sys_pledge_linux(unsigned long ipromises) {
struct Filter f;
CheckLargeStackAllocation(&f, sizeof(f));
f.n = 0;
// set up the seccomp filter
AppendFilter(&f, PLEDGE(kFilterStart));
if (ipromises == -1) {
// if we're pledging empty string, then avoid triggering a sigsys
// when _Exit() gets called since we need to fallback to _Exit1()
AppendFilter(&f, PLEDGE(kFilterIgnoreExitGroup));
}
if (!(~ipromises & (1ul << PROMISE_EXEC))) {
AppendOriginVerification(&f);
}
@ -1332,7 +1431,30 @@ int sys_pledge_linux(unsigned long ipromises) {
AppendPledge(&f, kPledgeLinux[i].syscalls, kPledgeLinux[i].len);
}
}
AppendFilter(&f, PLEDGE(kFilterEnd));
// now determine the default seccomp action
// the __pledge_mode global could be set to
// - SECCOMP_RET_KILL
// - SECCOMP_RET_KILL_THREAD
// - SECCOMP_RET_KILL_PROCESS
// - SECCOMP_RET_ERRNO | EPERM
struct sock_filter filter[1] = {BPF_STMT(BPF_RET | BPF_K, 0)};
if (~ipromises & (1ul << PROMISE_EXEC)) {
// our sigsys error message handler can't be inherited across
// execve() boundaries so if you've pledged exec then that'll
// mean no error messages for you.
filter[0].k = __pledge_mode;
AppendFilter(&f, PLEDGE(filter));
} else {
// if we haven't pledged exec, then we can monitor SIGSYS
// and print a helpful error message when things do break
// the handler then decides what to do with __pledge_mode
MonitorSigSys();
filter[0].k = SECCOMP_RET_TRAP | EPERM;
AppendFilter(&f, PLEDGE(filter));
}
// register our seccomp filter with the kernel
if ((rc = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) != -1) {
struct sock_fprog sandbox = {.len = f.n, .filter = f.p};
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &sandbox);
@ -1388,11 +1510,13 @@ int ParsePromises(const char *promises, unsigned long *out) {
*
* Pledging causes most system calls to become unavailable. Your system
* call policy is enforced by the kernel, which means it can propagate
* across execve() if permitted. This system call is supported on
* OpenBSD and Linux where it's polyfilled using SECCOMP BPF. The way it
* works on Linux is verboten system calls will raise EPERM whereas
* OpenBSD just kills the process while logging a helpful message to
* /var/log/messages explaining which promise category you needed.
* across execve() if permitted. Root is not required. This system call
* is supported on OpenBSD and Linux where it's polyfilled using SECCOMP
* BPF. The way it works on Linux is, if a forbidden system call is used
* then the kernel will will the process. On OpenBSD, a helpful message
* explaining which promise is needed should be emitted to your system
* log. On Linux, we log that to stderr with one exception: reporting is
* currently not possible if you pledge exec.
*
* Timing is everything with pledge. For example, if you're using
* threads, then you may want to enable them explicitly *before* calling
@ -1548,6 +1672,7 @@ int ParsePromises(const char *promises, unsigned long *out) {
* @raise ENOSYS if host os isn't Linux or OpenBSD
* @raise EINVAL if `execpromises` on Linux isn't a subset of `promises`
* @raise EINVAL if `promises` allows exec and `execpromises` is null
* @threadsafe
*/
int pledge(const char *promises, const char *execpromises) {
int rc;

View file

@ -99,7 +99,6 @@ static bool __sig_deliver(bool restartable, int sig, int si_code,
STRACE("delivering %G", sig);
// enter the signal
__sig_lock();
rva = __sighandrvas[sig];
flags = __sighandflags[sig];
if ((~flags & SA_NODEFER) || (flags & SA_RESETHAND)) {
@ -110,7 +109,6 @@ static bool __sig_deliver(bool restartable, int sig, int si_code,
// signal handler. in that case you must use SA_NODEFER.
__sighandrvas[sig] = (int32_t)(intptr_t)SIG_DFL;
}
__sig_unlock();
// setup the somewhat expensive information args
// only if they're requested by the user in sigaction()

View file

@ -37,7 +37,6 @@
#include "libc/dce.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/spinlock.h"
#include "libc/limits.h"
#include "libc/log/backtrace.internal.h"
#include "libc/log/log.h"
@ -245,7 +244,7 @@ static int __sigaction(int sig, const struct sigaction *act,
}
/**
* Installs handler for kernel interrupt, e.g.:
* Installs handler for kernel interrupt to thread, e.g.:
*
* void GotCtrlC(int sig, siginfo_t *si, ucontext_t *ctx);
* struct sigaction sa = {.sa_sigaction = GotCtrlC,
@ -445,6 +444,7 @@ static int __sigaction(int sig, const struct sigaction *act,
* @return 0 on success or -1 w/ errno
* @see xsigaction() for a much better api
* @asyncsignalsafe
* @threadsafe
* @vforksafe
*/
int sigaction(int sig, const struct sigaction *act, struct sigaction *oldact) {
@ -452,9 +452,7 @@ int sigaction(int sig, const struct sigaction *act, struct sigaction *oldact) {
if (sig == SIGKILL || sig == SIGSTOP) {
rc = einval();
} else {
__sig_lock();
rc = __sigaction(sig, act, oldact);
__sig_unlock();
}
STRACE("sigaction(%G, %s, [%s]) → %d% m", sig, DescribeSigaction(0, act),
DescribeSigaction(rc, oldact), rc);

32
libc/calls/siglock.c Normal file
View file

@ -0,0 +1,32 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Justine Alexandra Roberts Tunney
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/calls/state.internal.h"
#include "libc/intrin/pthread.h"
static pthread_mutex_t __sig_lock_obj;
void(__sig_lock)(void) {
__sig_lock_obj.attr = PTHREAD_MUTEX_RECURSIVE;
pthread_mutex_lock(&__sig_lock_obj);
}
void(__sig_unlock)(void) {
__sig_lock_obj.attr = PTHREAD_MUTEX_RECURSIVE;
pthread_mutex_unlock(&__sig_lock_obj);
}

View file

@ -7,8 +7,8 @@ COSMOPOLITAN_C_START_
hidden extern int __vforked;
hidden extern bool __time_critical;
hidden extern unsigned __sighandrvas[NSIG];
hidden extern unsigned __sighandflags[NSIG];
hidden _Thread_local extern unsigned __sighandrvas[NSIG];
hidden _Thread_local extern unsigned __sighandflags[NSIG];
hidden extern const struct NtSecurityAttributes kNtIsInheritable;
void __fds_lock(void);

View file

@ -0,0 +1,26 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_STATFS_H_
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_STATFS_H_
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_
struct statfs {
int64_t f_type; /* type of filesystem */
int64_t f_bsize; /* optimal transfer block size */
int64_t f_blocks; /* total data blocks in filesystem */
int64_t f_bfree; /* free blocks in filesystem */
int64_t f_bavail; /* free blocks available to */
int64_t f_files; /* total file nodes in filesystem */
int64_t f_ffree; /* free file nodes in filesystem */
int64_t f_fsid; /* filesystem id */
int64_t f_namelen; /* maximum length of filenames */
int64_t f_frsize; /* fragment size */
int64_t f_flags; /* mount flags of filesystem 2.6.36 */
int64_t f_spare[4];
};
int statfs(const char *, struct statfs *);
int fstatfs(int, struct statfs *);
COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* COSMOPOLITAN_LIBC_CALLS_STRUCT_STATFS_H_ */

View file

@ -68,7 +68,6 @@ i32 sys_mincore(void *, u64, unsigned char *) hidden;
i32 sys_mkdirat(i32, const char *, u32) hidden;
i32 sys_mkfifo(const char *, u32) hidden;
i32 sys_mknod(const char *, u32, u64) hidden;
i32 sys_unmount(const char *, i32) hidden;
i32 sys_mprotect(void *, u64, i32) hidden;
i32 sys_msync(void *, u64, i32) hidden;
i32 sys_munmap(void *, u64) hidden;
@ -97,11 +96,13 @@ i32 sys_sigaltstack(const void *, void *) hidden;
i32 sys_symlinkat(const char *, i32, const char *) hidden;
i32 sys_sync(void) hidden;
i32 sys_sync_file_range(i32, i64, i64, u32) hidden;
i32 sys_syslog(i32, char *, i32) hidden;
i32 sys_tgkill(i32, i32, i32) hidden;
i32 sys_tkill(i32, i32, void *) hidden;
i32 sys_truncate(const char *, u64, u64) hidden;
i32 sys_uname(void *) hidden;
i32 sys_unlinkat(i32, const char *, i32) hidden;
i32 sys_unmount(const char *, i32) hidden;
i32 sys_unveil(const char *, const char *) hidden;
i64 sys_copy_file_range(i32, long *, i32, long *, u64, u32) hidden;
i64 sys_getrandom(void *, u64, u32) hidden;