linux-stable/include/linux/seccomp.h
Sargun Dhillon 7cf97b1254 seccomp: Introduce addfd ioctl to seccomp user notifier
The current SECCOMP_RET_USER_NOTIF API allows for syscall supervision over
an fd. It is often used in settings where a supervising task emulates
syscalls on behalf of a supervised task in userspace, either to further
restrict the supervisee's syscall abilities or to circumvent kernel
enforced restrictions the supervisor deems safe to lift (e.g. actually
performing a mount(2) for an unprivileged container).

While SECCOMP_RET_USER_NOTIF allows for the interception of any syscall,
only a certain subset of syscalls could be correctly emulated. Over the
last few development cycles, the set of syscalls which can't be emulated
has been reduced due to the addition of pidfd_getfd(2). With this we are
now able to, for example, intercept syscalls that require the supervisor
to operate on file descriptors of the supervisee such as connect(2).

However, syscalls that cause new file descriptors to be installed can not
currently be correctly emulated since there is no way for the supervisor
to inject file descriptors into the supervisee. This patch adds a
new addfd ioctl to remove this restriction by allowing the supervisor to
install file descriptors into the intercepted task. By implementing this
feature via seccomp the supervisor effectively instructs the supervisee
to install a set of file descriptors into its own file descriptor table
during the intercepted syscall. This way it is possible to intercept
syscalls such as open() or accept(), and install (or replace, like
dup2(2)) the supervisor's resulting fd into the supervisee. One
replacement use-case would be to redirect the stdout and stderr of a
supervisee into log file descriptors opened by the supervisor.

The ioctl handling is based on the discussions[1] of how Extensible
Arguments should interact with ioctls. Instead of building size into
the addfd structure, make it a function of the ioctl command (which
is how sizes are normally passed to ioctls). To support forward and
backward compatibility, just mask out the direction and size, and match
everything. The size (and any future direction) checks are done along
with copy_struct_from_user() logic.

As a note, the seccomp_notif_addfd structure is laid out based on 8-byte
alignment without requiring packing as there have been packing issues
with uapi highlighted before[2][3]. Although we could overload the
newfd field and use -1 to indicate that it is not to be used, doing
so requires changing the size of the fd field, and introduces struct
packing complexity.

[1]: https://lore.kernel.org/lkml/87o8w9bcaf.fsf@mid.deneb.enyo.de/
[2]: https://lore.kernel.org/lkml/a328b91d-fd8f-4f27-b3c2-91a9c45f18c0@rasmusvillemoes.dk/
[3]: https://lore.kernel.org/lkml/20200612104629.GA15814@ircssh-2.c.rugged-nimbus-611.internal

Cc: Christoph Hellwig <hch@lst.de>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Tycho Andersen <tycho@tycho.ws>
Cc: Jann Horn <jannh@google.com>
Cc: Robert Sesek <rsesek@google.com>
Cc: Chris Palmer <palmer@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Suggested-by: Matt Denton <mpdenton@google.com>
Link: https://lore.kernel.org/r/20200603011044.7972-4-sargun@sargun.me
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Reviewed-by: Will Drewry <wad@chromium.org>
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
2020-07-14 16:29:42 -07:00

122 lines
3.2 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SECCOMP_H
#define _LINUX_SECCOMP_H
#include <uapi/linux/seccomp.h>
#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \
SECCOMP_FILTER_FLAG_LOG | \
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
SECCOMP_FILTER_FLAG_NEW_LISTENER | \
SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
/* sizeof() the first published struct seccomp_notif_addfd */
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
#define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0
#ifdef CONFIG_SECCOMP
#include <linux/thread_info.h>
#include <linux/atomic.h>
#include <asm/seccomp.h>
struct seccomp_filter;
/**
* struct seccomp - the state of a seccomp'ed process
*
* @mode: indicates one of the valid values above for controlled
* system calls available to a process.
* @filter: must always point to a valid seccomp-filter or NULL as it is
* accessed without locking during system call entry.
*
* @filter must only be accessed from the context of current as there
* is no read locking.
*/
struct seccomp {
int mode;
atomic_t filter_count;
struct seccomp_filter *filter;
};
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
{
if (unlikely(test_thread_flag(TIF_SECCOMP)))
return __secure_computing(NULL);
return 0;
}
#else
extern void secure_computing_strict(int this_syscall);
#endif
extern long prctl_get_seccomp(void);
extern long prctl_set_seccomp(unsigned long, void __user *);
static inline int seccomp_mode(struct seccomp *s)
{
return s->mode;
}
#else /* CONFIG_SECCOMP */
#include <linux/errno.h>
struct seccomp { };
struct seccomp_filter { };
#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
static inline int secure_computing(void) { return 0; }
#else
static inline void secure_computing_strict(int this_syscall) { return; }
#endif
static inline long prctl_get_seccomp(void)
{
return -EINVAL;
}
static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3)
{
return -EINVAL;
}
static inline int seccomp_mode(struct seccomp *s)
{
return SECCOMP_MODE_DISABLED;
}
#endif /* CONFIG_SECCOMP */
#ifdef CONFIG_SECCOMP_FILTER
extern void seccomp_filter_release(struct task_struct *tsk);
extern void get_seccomp_filter(struct task_struct *tsk);
#else /* CONFIG_SECCOMP_FILTER */
static inline void seccomp_filter_release(struct task_struct *tsk)
{
return;
}
static inline void get_seccomp_filter(struct task_struct *tsk)
{
return;
}
#endif /* CONFIG_SECCOMP_FILTER */
#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
extern long seccomp_get_filter(struct task_struct *task,
unsigned long filter_off, void __user *data);
extern long seccomp_get_metadata(struct task_struct *task,
unsigned long filter_off, void __user *data);
#else
static inline long seccomp_get_filter(struct task_struct *task,
unsigned long n, void __user *data)
{
return -EINVAL;
}
static inline long seccomp_get_metadata(struct task_struct *task,
unsigned long filter_off,
void __user *data)
{
return -EINVAL;
}
#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */
#endif /* _LINUX_SECCOMP_H */