unveil: Added truncate support on Linux 6.2+ (#803)

Right now, cosmopolitan uses Linux Landlock ABI version 2 on Linux,
meaning that the polyfill for unveil() cannot restrict operations such
as truncate() (a limitation of Landlock's ABI from then). This means
that to restrict truncation operations Cosmopolitan instead has to ban
the syscall through a SECCOMP BPF filter, meaning that completely
legitimate truncate() calls are blocked

However, the newest version of the Landlock ABI (version 3) introduced
in Linux 6.2, released in February 2023, implements support for controlling truncation
operations. As such, the previous SECCOMP BPF truncate() filtering is
no longer needed when the new ABI is available

This patch implements unveil truncate support for Linux Landlock ABI
version 3
This commit is contained in:
Gabriel Ravier 2023-04-18 01:17:02 +02:00 committed by GitHub
parent f7bccf5513
commit d5b8b644c2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 18 deletions

View file

@ -26,6 +26,15 @@
*/
#define LANDLOCK_ACCESS_FS_REFER 0x2000ul
/**
* Control file truncation.
*
* @see https://lore.kernel.org/all/20221018182216.301684-1-gnoack3000@gmail.com/
* @see https://docs.kernel.org/userspace-api/landlock.html
* @note ABI 3+
*/
#define LANDLOCK_ACCESS_FS_TRUNCATE 0x4000ul
#if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_

View file

@ -53,7 +53,7 @@
#define UNVEIL_READ \
(LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR | \
LANDLOCK_ACCESS_FS_REFER)
#define UNVEIL_WRITE (LANDLOCK_ACCESS_FS_WRITE_FILE)
#define UNVEIL_WRITE (LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE)
#define UNVEIL_EXEC (LANDLOCK_ACCESS_FS_EXECUTE)
#define UNVEIL_CREATE \
(LANDLOCK_ACCESS_FS_MAKE_CHAR | LANDLOCK_ACCESS_FS_MAKE_DIR | \
@ -65,7 +65,7 @@
(LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_EXECUTE)
static const struct sock_filter kUnveilBlacklist[] = {
static const struct sock_filter kUnveilBlacklistAbiVersionBelow3[] = {
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
@ -76,13 +76,29 @@ static const struct sock_filter kUnveilBlacklist[] = {
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};
static const struct sock_filter kUnveilBlacklistLatestAbi[] = {
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(arch)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, AUDIT_ARCH_X86_64, 1, 0),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
BPF_STMT(BPF_LD | BPF_W | BPF_ABS, OFF(nr)),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_linux_setxattr, 0, 1),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ERRNO | (1 & SECCOMP_RET_DATA)),
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
};
static int landlock_abi_version;
__attribute__((__constructor__)) void init_landlock_version() {
landlock_abi_version = landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION);
}
/**
* Long living state for landlock calls.
* fs_mask is set to use all the access rights from the latest landlock ABI.
* On init, the current supported abi is checked and unavailable rights are
* masked off.
*
* As of 5.19, the latest abi is v2.
* As of 6.2, the latest abi is v3.
*
* TODO:
* - Integrate with pledge and remove the file access?
@ -96,9 +112,15 @@ _Thread_local static struct {
static int unveil_final(void) {
int e, rc;
struct sock_fprog sandbox = {
.filter = kUnveilBlacklist,
.len = ARRAYLEN(kUnveilBlacklist),
.filter = kUnveilBlacklistLatestAbi,
.len = ARRAYLEN(kUnveilBlacklistLatestAbi),
};
if (landlock_abi_version < 3) {
sandbox = (struct sock_fprog){
.filter = kUnveilBlacklistAbiVersionBelow3,
.len = ARRAYLEN(kUnveilBlacklistAbiVersionBelow3),
};
}
e = errno;
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
errno = e;
@ -120,16 +142,18 @@ static int err_close(int rc, int fd) {
static int unveil_init(void) {
int rc, fd;
State.fs_mask = UNVEIL_READ | UNVEIL_WRITE | UNVEIL_EXEC | UNVEIL_CREATE;
if ((rc = landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION)) ==
-1) {
if (landlock_abi_version == -1) {
if (errno == EOPNOTSUPP) {
errno = ENOSYS;
}
return -1;
}
if (rc < 2) {
if (landlock_abi_version < 2) {
State.fs_mask &= ~LANDLOCK_ACCESS_FS_REFER;
}
if (landlock_abi_version < 3) {
State.fs_mask &= ~LANDLOCK_ACCESS_FS_TRUNCATE;
}
const struct landlock_ruleset_attr attr = {
.handled_access_fs = State.fs_mask,
};
@ -301,14 +325,19 @@ int sys_unveil_linux(const char *path, const char *permissions) {
* possible to use opendir() and go fishing for paths which weren't
* previously known.
*
* 5. Use ftruncate() rather than truncate(). One issue Landlock hasn't
* addressed yet is restrictions over truncate() and setxattr() which
* could permit certain kinds of modifications to files outside the
* sandbox. When your policy is committed, we install a SECCOMP BPF
* filter to disable those calls, however similar trickery may be
* possible through other unaddressed calls like ioctl(). Using the
* pledge() function in addition to unveil() will solve this, since
* it installs a strong system call access policy.
* 5. Use ftruncate() rather than truncate() if you wish for portability to
* Linux kernels versions released before February 2022. One issue
* Landlock hadn't addressed as of ABI version 2 was restrictions over
* truncate() and setxattr() which could permit certain kinds of
* modifications to files outside the sandbox. When your policy is
* committed, we install a SECCOMP BPF filter to disable those calls,
* however similar trickery may be possible through other unaddressed
* calls like ioctl(). Using the pledge() function in addition to
* unveil() will solve this, since it installs a strong system call
* access policy. Linux 6.2 has improved this situation with Landlock
* ABI v3, which added the ability to control truncation operations -
* this means the SECCOMP BPF filter will only disable
* truncate() on Linux 6.1 or older
*
* 6. Set your process-wide policy at startup from the main thread. On
* OpenBSD unveil() will apply process-wide even when called from a
@ -347,7 +376,8 @@ int sys_unveil_linux(const char *path, const char *permissions) {
* @raise EINVAL if one argument is set and the other is not
* @raise EINVAL if an invalid character in `permissions` was found
* @raise EPERM if unveil() is called after locking
* @note on Linux this function requires Linux Kernel 5.13+
* @note on Linux this function requires Linux Kernel 5.13+ and version 6.2+
* to properly support truncation operations
* @see [1] https://docs.kernel.org/userspace-api/landlock.html
* @threadsafe
*/

View file

@ -68,6 +68,10 @@ void SetUp(void) {
ASSERT_SYS(0, 0, stat("/zip/life.elf", &st));
}
bool HasTruncateSupport(void) {
return IsOpenbsd() || landlock_create_ruleset(0, 0, LANDLOCK_CREATE_RULESET_VERSION) >= 3;
}
TEST(unveil, api_differences) {
SPAWN(fork);
ASSERT_SYS(0, 0, mkdir("foo", 0755));
@ -245,7 +249,7 @@ TEST(unveil, truncate_isForbiddenBySeccomp) {
ASSERT_SYS(0, 0, xbarf("garden/secret.txt", "hello", 5));
ASSERT_SYS(0, 0, unveil("jail", "rw"));
ASSERT_SYS(0, 0, unveil(0, 0));
ASSERT_SYS(IsOpenbsd() ? ENOENT : EPERM, -1,
ASSERT_SYS(!HasTruncateSupport() ? EPERM : EACCES_OR_ENOENT, -1,
truncate("garden/secret.txt", 0));
if (IsLinux()) {
ASSERT_SYS(0, 0, stat("garden/secret.txt", &st));