add statmount(2) syscall

Add a way to query attributes of a single mount instead of having to parse
the complete /proc/$PID/mountinfo, which might be huge.

Lookup the mount the new 64bit mount ID.  If a mount needs to be queried
based on path, then statx(2) can be used to first query the mount ID
belonging to the path.

Design is based on a suggestion by Linus:

  "So I'd suggest something that is very much like "statfsat()", which gets
   a buffer and a length, and returns an extended "struct statfs" *AND*
   just a string description at the end."

The interface closely mimics that of statx.

Handle ASCII attributes by appending after the end of the structure (as per
above suggestion).  Pointers to strings are stored in u64 members to make
the structure the same regardless of pointer size.  Strings are nul
terminated.

Link: https://lore.kernel.org/all/CAHk-=wh5YifP7hzKSbwJj94+DZ2czjrZsczy6GBimiogZws=rg@mail.gmail.com/
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Link: https://lore.kernel.org/r/20231025140205.3586473-5-mszeredi@redhat.com
Reviewed-by: Ian Kent <raven@themaw.net>
[Christian Brauner <brauner@kernel.org>: various minor changes]
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Miklos Szeredi 2023-10-25 16:02:02 +02:00 committed by Christian Brauner
parent 56c94c6267
commit 46eae99ef7
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
3 changed files with 339 additions and 0 deletions

View File

@ -4683,6 +4683,287 @@ int show_path(struct seq_file *m, struct dentry *root)
return 0;
}
static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns)
{
struct mount *mnt = mnt_find_id_at(ns, id);
if (!mnt || mnt->mnt_id_unique != id)
return NULL;
return &mnt->mnt;
}
struct kstatmount {
struct statmount __user *const buf;
size_t const bufsize;
struct vfsmount *const mnt;
u64 const mask;
struct seq_file seq;
struct path root;
struct statmount sm;
size_t pos;
int err;
};
typedef int (*statmount_func_t)(struct kstatmount *);
static int statmount_string_seq(struct kstatmount *s, statmount_func_t func)
{
size_t rem = s->bufsize - s->pos - sizeof(s->sm);
struct seq_file *seq = &s->seq;
int ret;
seq->count = 0;
seq->size = min(seq->size, rem);
seq->buf = kvmalloc(seq->size, GFP_KERNEL_ACCOUNT);
if (!seq->buf)
return -ENOMEM;
ret = func(s);
if (ret)
return ret;
if (seq_has_overflowed(seq)) {
if (seq->size == rem)
return -EOVERFLOW;
seq->size *= 2;
if (seq->size > MAX_RW_COUNT)
return -ENOMEM;
kvfree(seq->buf);
return 0;
}
/* Done */
return 1;
}
static void statmount_string(struct kstatmount *s, u64 mask, statmount_func_t func,
u32 *str)
{
int ret = s->pos + sizeof(s->sm) >= s->bufsize ? -EOVERFLOW : 0;
struct statmount *sm = &s->sm;
struct seq_file *seq = &s->seq;
if (s->err || !(s->mask & mask))
return;
seq->size = PAGE_SIZE;
while (!ret)
ret = statmount_string_seq(s, func);
if (ret < 0) {
s->err = ret;
} else {
seq->buf[seq->count++] = '\0';
if (copy_to_user(s->buf->str + s->pos, seq->buf, seq->count)) {
s->err = -EFAULT;
} else {
*str = s->pos;
s->pos += seq->count;
}
}
kvfree(seq->buf);
sm->mask |= mask;
}
static void statmount_numeric(struct kstatmount *s, u64 mask, statmount_func_t func)
{
if (s->err || !(s->mask & mask))
return;
s->err = func(s);
s->sm.mask |= mask;
}
static u64 mnt_to_attr_flags(struct vfsmount *mnt)
{
unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags);
u64 attr_flags = 0;
if (mnt_flags & MNT_READONLY)
attr_flags |= MOUNT_ATTR_RDONLY;
if (mnt_flags & MNT_NOSUID)
attr_flags |= MOUNT_ATTR_NOSUID;
if (mnt_flags & MNT_NODEV)
attr_flags |= MOUNT_ATTR_NODEV;
if (mnt_flags & MNT_NOEXEC)
attr_flags |= MOUNT_ATTR_NOEXEC;
if (mnt_flags & MNT_NODIRATIME)
attr_flags |= MOUNT_ATTR_NODIRATIME;
if (mnt_flags & MNT_NOSYMFOLLOW)
attr_flags |= MOUNT_ATTR_NOSYMFOLLOW;
if (mnt_flags & MNT_NOATIME)
attr_flags |= MOUNT_ATTR_NOATIME;
else if (mnt_flags & MNT_RELATIME)
attr_flags |= MOUNT_ATTR_RELATIME;
else
attr_flags |= MOUNT_ATTR_STRICTATIME;
if (is_idmapped_mnt(mnt))
attr_flags |= MOUNT_ATTR_IDMAP;
return attr_flags;
}
static u64 mnt_to_propagation_flags(struct mount *m)
{
u64 propagation = 0;
if (IS_MNT_SHARED(m))
propagation |= MS_SHARED;
if (IS_MNT_SLAVE(m))
propagation |= MS_SLAVE;
if (IS_MNT_UNBINDABLE(m))
propagation |= MS_UNBINDABLE;
if (!propagation)
propagation |= MS_PRIVATE;
return propagation;
}
static int statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
s->sm.sb_dev_major = MAJOR(sb->s_dev);
s->sm.sb_dev_minor = MINOR(sb->s_dev);
s->sm.sb_magic = sb->s_magic;
s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME);
return 0;
}
static int statmount_mnt_basic(struct kstatmount *s)
{
struct mount *m = real_mount(s->mnt);
s->sm.mnt_id = m->mnt_id_unique;
s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique;
s->sm.mnt_id_old = m->mnt_id;
s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id;
s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt);
s->sm.mnt_propagation = mnt_to_propagation_flags(m);
s->sm.mnt_peer_group = IS_MNT_SHARED(m) ? m->mnt_group_id : 0;
s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0;
return 0;
}
static int statmount_propagate_from(struct kstatmount *s)
{
struct mount *m = real_mount(s->mnt);
if (!IS_MNT_SLAVE(m))
return 0;
s->sm.propagate_from = get_dominating_id(m, &current->fs->root);
return 0;
}
static int statmount_mnt_root(struct kstatmount *s)
{
struct seq_file *seq = &s->seq;
int err = show_path(seq, s->mnt->mnt_root);
if (!err && !seq_has_overflowed(seq)) {
seq->buf[seq->count] = '\0';
seq->count = string_unescape_inplace(seq->buf, UNESCAPE_OCTAL);
}
return err;
}
static int statmount_mnt_point(struct kstatmount *s)
{
struct vfsmount *mnt = s->mnt;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
int err = seq_path_root(&s->seq, &mnt_path, &s->root, "");
return err == SEQ_SKIP ? 0 : err;
}
static int statmount_fs_type(struct kstatmount *s)
{
struct seq_file *seq = &s->seq;
struct super_block *sb = s->mnt->mnt_sb;
seq_puts(seq, sb->s_type->name);
return 0;
}
static int do_statmount(struct kstatmount *s)
{
struct statmount *sm = &s->sm;
struct mount *m = real_mount(s->mnt);
size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm));
int err;
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
return err;
statmount_numeric(s, STATMOUNT_SB_BASIC, statmount_sb_basic);
statmount_numeric(s, STATMOUNT_MNT_BASIC, statmount_mnt_basic);
statmount_numeric(s, STATMOUNT_PROPAGATE_FROM, statmount_propagate_from);
statmount_string(s, STATMOUNT_FS_TYPE, statmount_fs_type, &sm->fs_type);
statmount_string(s, STATMOUNT_MNT_ROOT, statmount_mnt_root, &sm->mnt_root);
statmount_string(s, STATMOUNT_MNT_POINT, statmount_mnt_point, &sm->mnt_point);
if (s->err)
return s->err;
/* Return the number of bytes copied to the buffer */
sm->size = copysize + s->pos;
if (copy_to_user(s->buf, sm, copysize))
return -EFAULT;
return 0;
}
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags)
{
struct vfsmount *mnt;
struct mnt_id_req kreq;
int ret;
if (flags)
return -EINVAL;
if (copy_from_user(&kreq, req, sizeof(kreq)))
return -EFAULT;
down_read(&namespace_sem);
mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
ret = -ENOENT;
if (mnt) {
struct kstatmount s = {
.mask = kreq.request_mask,
.buf = buf,
.bufsize = bufsize,
.mnt = mnt,
};
get_fs_root(current->fs, &s.root);
ret = do_statmount(&s);
path_put(&s.root);
}
up_read(&namespace_sem);
return ret;
}
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;

View File

@ -74,6 +74,8 @@ struct landlock_ruleset_attr;
enum landlock_rule_type;
struct cachestat_range;
struct cachestat;
struct statmount;
struct mnt_id_req;
#include <linux/types.h>
#include <linux/aio_abi.h>
@ -407,6 +409,9 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz,
asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf);
asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
struct statfs64 __user *buf);
asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
struct statmount __user *buf, size_t bufsize,
unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
#if BITS_PER_LONG == 32

View File

@ -138,4 +138,57 @@ struct mount_attr {
/* List of all mount_attr versions. */
#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
/*
* Structure for getting mount/superblock/filesystem info with statmount(2).
*
* The interface is similar to statx(2): individual fields or groups can be
* selected with the @mask argument of statmount(). Kernel will set the @mask
* field according to the supported fields.
*
* If string fields are selected, then the caller needs to pass a buffer that
* has space after the fixed part of the structure. Nul terminated strings are
* copied there and offsets relative to @str are stored in the relevant fields.
* If the buffer is too small, then EOVERFLOW is returned. The actually used
* size is returned in @size.
*/
struct statmount {
__u32 size; /* Total size, including strings */
__u32 __spare1;
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
__u64 sb_magic; /* ..._SUPER_MAGIC */
__u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
__u32 fs_type; /* [str] Filesystem type */
__u64 mnt_id; /* Unique ID of mount */
__u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
__u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
__u32 mnt_parent_id_old;
__u64 mnt_attr; /* MOUNT_ATTR_... */
__u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
__u64 mnt_peer_group; /* ID of shared peer group */
__u64 mnt_master; /* Mount receives propagation from this ID */
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
__u64 __spare2[50];
char str[]; /* Variable size part containing strings */
};
struct mnt_id_req {
__u64 mnt_id;
__u64 request_mask;
};
/*
* @mask bits for statmount(2)
*/
#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#endif /* _UAPI_LINUX_MOUNT_H */