seccomp updates for v5.14-rc1

Add "atomic addfd + send reply" mode to SECCOMP_USER_NOTIF to better
 handle EINTR races visible to seccomp monitors. (Rodrigo Campos,
 Sargun Dhillon)
 
 Improve seccomp selftests for readability in CI systems. (Kees Cook)
 -----BEGIN PGP SIGNATURE-----
 
 iQJKBAABCgA0FiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAmDaKLwWHGtlZXNjb29r
 QGNocm9taXVtLm9yZwAKCRCJcvTf3G3AJjwYD/wKVvQw9NBt+0Beo1lUvRmSDL6y
 zD1dg0ACiUc3O8kLszT6YtSiNFLSA4AlYI40puq/fs8BrP5ssvoUdlmkge88p1ph
 iLFBWXPP7ZG8mIdul35Cl0Z4r0T8NfDm9A5MoGGx9zfkWOhz9aiUKvR5EGHhX2K2
 DMsCkG2JtVmoUfKLUIHtOtDf90LdwDXT4g/etgevh/xAEcwb48wx0fHrnUuXeHWS
 Myor2w/RBs7XxMjizfhwxuUqDR6ZznWPbpSvfXoqJF7unfsXq1kUKG47POIUSHYb
 mC6MtAZ8Z3V4kF/PVb2JqpFJOf6mEKsNVeDbNX25PtRCpd+ypRN+qD7k6e9OT+yc
 Jx42ontBQIOS3IYc2ahZ9R4UcC1SVMySFPol/DxnsW5c49X5CMLHeAjFY+25/H6d
 XvBOD+W4tQMqHLZroGiqcA+db672lE23DOsbNDSxaJOwhtSPbIlxHxN+vaHuoN1D
 QJKhmkmcBuqtOQLaCPAsKqYwIftix6pmxLHyAw/EMalwHTJtZRvA9IoIUS0e1w68
 2tWH9RgSlIUZOvy5kRQ2wzth9yqnet4/a6rOMKRjLCpsTJQW48H+9zPHGzrcnT24
 HVjPG+OGq7+/uGPWoSBCh5fuJ1UT9jhLGkDgfxS2BHTUYClchZFLPpD3t9nHaNUn
 mJweXo0F19fKyadWSw==
 =rOqS
 -----END PGP SIGNATURE-----

Merge tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:

 - Add "atomic addfd + send reply" mode to SECCOMP_USER_NOTIF to better
   handle EINTR races visible to seccomp monitors. (Rodrigo Campos,
   Sargun Dhillon)

 - Improve seccomp selftests for readability in CI systems. (Kees Cook)

* tag 'seccomp-v5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  selftests/seccomp: Avoid using "sysctl" for report
  selftests/seccomp: Flush benchmark output
  selftests/seccomp: More closely track fds being assigned
  selftests/seccomp: Add test for atomic addfd+send
  seccomp: Support atomic "addfd + send reply"
This commit is contained in:
Linus Torvalds 2021-06-28 19:49:37 -07:00
commit 616ea5cc4a
5 changed files with 113 additions and 12 deletions

View File

@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
be the same ``id`` as in ``struct seccomp_notif``.
Userspace can also add file descriptors to the notifying process via
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
``struct seccomp_notif_addfd`` should be the same ``id`` as in
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
like O_EXEC on the file descriptor in the notifying process. If the supervisor
wants to inject the file descriptor with a specific number, the
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
the specific number to use. If that file descriptor is already open in the
notifying process it will be replaced. The supervisor can also add an FD, and
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
value will be the injected file descriptor number.
It is worth noting that ``struct seccomp_data`` contains the values of register
arguments to the syscall, but does not contain pointers to memory. The task's
memory is accessible to suitably privileged traces via ``ptrace()`` or

View File

@ -115,6 +115,7 @@ struct seccomp_notif_resp {
/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
/**
* struct seccomp_notif_addfd

View File

@ -107,6 +107,7 @@ struct seccomp_knotif {
* installing process should allocate the fd as normal.
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed.
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
* @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0).
* @completion: Indicates that the installing process has completed fd
@ -118,6 +119,7 @@ struct seccomp_kaddfd {
struct file *file;
int fd;
unsigned int flags;
__u32 ioctl_flags;
union {
bool setfd;
@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
return filter->notif->next_id++;
}
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
{
int fd;
/*
* Remove the notification, and reset the list pointers, indicating
* that it has been handled.
*/
list_del_init(&addfd->list);
if (!addfd->setfd)
addfd->ret = receive_fd(addfd->file, addfd->flags);
fd = receive_fd(addfd->file, addfd->flags);
else
addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
addfd->flags);
fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
addfd->ret = fd;
if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
/* If we fail reset and return an error to the notifier */
if (fd < 0) {
n->state = SECCOMP_NOTIFY_SENT;
} else {
/* Return the FD we just added */
n->flags = 0;
n->error = 0;
n->val = fd;
}
}
/*
* Mark the notification as completed. From this point, addfd mem
* might be invalidated and we can't safely read it anymore.
*/
complete(&addfd->completion);
}
@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
struct seccomp_kaddfd, list);
/* Check if we were woken up by a addfd message */
if (addfd)
seccomp_handle_addfd(addfd);
seccomp_handle_addfd(addfd, &n);
} while (n.state != SECCOMP_NOTIFY_REPLIED);
@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (addfd.newfd_flags & ~O_CLOEXEC)
return -EINVAL;
if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
return -EINVAL;
if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
if (!kaddfd.file)
return -EBADF;
kaddfd.ioctl_flags = addfd.flags;
kaddfd.flags = addfd.newfd_flags;
kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
kaddfd.fd = addfd.newfd;
@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
goto out_unlock;
}
if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
/*
* Disallow queuing an atomic addfd + send reply while there are
* some addfd requests still to process.
*
* There is no clear reason to support it and allows us to keep
* the loop on the other side straight-forward.
*/
if (!list_empty(&knotif->addfd)) {
ret = -EBUSY;
goto out_unlock;
}
/* Allow exactly only one reply */
knotif->state = SECCOMP_NOTIFY_REPLIED;
}
list_add(&kaddfd.list, &knotif->addfd);
complete(&knotif->ready);
mutex_unlock(&filter->notify_lock);

View File

@ -143,9 +143,15 @@ int main(int argc, char *argv[])
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
setbuf(stdout, NULL);
printf("Running on:\n");
system("uname -a");
printf("Current BPF sysctl settings:\n");
system("sysctl net.core.bpf_jit_enable");
system("sysctl net.core.bpf_jit_harden");
/* Avoid using "sysctl" which may not be installed. */
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);

View File

@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
};
#endif
#ifndef SECCOMP_ADDFD_FLAG_SEND
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
#endif
struct seccomp_notif_addfd_small {
__u64 id;
char weird[4];
@ -3959,7 +3963,7 @@ TEST(user_notification_addfd)
{
pid_t pid;
long ret;
int status, listener, memfd, fd;
int status, listener, memfd, fd, nextfd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif_addfd_small small = {};
struct seccomp_notif_addfd_big big = {};
@ -3968,25 +3972,34 @@ TEST(user_notification_addfd)
/* 100 ms */
struct timespec delay = { .tv_nsec = 100000000 };
/* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
nextfd = memfd + 1;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
/* fd: 4 */
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
ASSERT_EQ(listener, nextfd++);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
/* fds will be added and this value is expected */
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
exit(1);
/* Atomic addfd+send is received here. Check it is a valid fd */
if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
exit(1);
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
}
@ -4028,14 +4041,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
EXPECT_GE(fd, 0);
EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
EXPECT_GE(fd, 0);
EXPECT_EQ(fd, nextfd++);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@ -4065,6 +4078,32 @@ TEST(user_notification_addfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
/* Verify we can do an atomic addfd and send */
addfd.newfd = 0;
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
/*
* Child has earlier "low" fds and now 42, so we expect the next
* lowest available fd to be assigned here.
*/
EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/*
* This sets the ID of the ADD FD to the last request plus 1. The
* notification ID increments 1 per notification.
*/
addfd.id = req.id + 1;
/* This spins until the underlying notification is generated */
while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
errno != -EINPROGRESS)
nanosleep(&delay, NULL);
memset(&req, 0, sizeof(req));
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
@ -4125,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);