linux-stable/include/uapi/linux/mount.h
Pavel Tikhomirov 9ffb14ef61
move_mount: allow to add a mount into an existing group
Previously a sharing group (shared and master ids pair) can be only
inherited when mount is created via bindmount. This patch adds an
ability to add an existing private mount into an existing sharing group.

With this functionality one can first create the desired mount tree from
only private mounts (without the need to care about undesired mount
propagation or mount creation order implied by sharing group
dependencies), and next then setup any desired mount sharing between
those mounts in tree as needed.

This allows CRIU to restore any set of mount namespaces, mount trees and
sharing group trees for a container.

We have many issues with restoring mounts in CRIU related to sharing
groups and propagation:
- reverse sharing groups vs mount tree order requires complex mounts
  reordering which mostly implies also using some temporary mounts
(please see https://lkml.org/lkml/2021/3/23/569 for more info)

- mount() syscall creates tons of mounts due to propagation
- mount re-parenting due to propagation
- "Mount Trap" due to propagation
- "Non Uniform" propagation, meaning that with different tricks with
  mount order and temporary children-"lock" mounts one can create mount
  trees which can't be restored without those tricks
(see https://www.linuxplumbersconf.org/event/7/contributions/640/)

With this new functionality we can resolve all the problems with
propagation at once.

Link: https://lore.kernel.org/r/20210715100714.120228-1-ptikhomirov@virtuozzo.com
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Mattias Nissler <mnissler@chromium.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: lkml <linux-kernel@vger.kernel.org>
Co-developed-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
2021-07-26 14:45:18 +02:00

139 lines
5 KiB
C

#ifndef _UAPI_LINUX_MOUNT_H
#define _UAPI_LINUX_MOUNT_H
#include <linux/types.h>
/*
* These are the fs-independent mount-flags: up to 32 flags are supported
*
* Usage of these is restricted within the kernel to core mount(2) code and
* callers of sys_mount() only. Filesystems should be using the SB_*
* equivalent instead.
*/
#define MS_RDONLY 1 /* Mount read-only */
#define MS_NOSUID 2 /* Ignore suid and sgid bits */
#define MS_NODEV 4 /* Disallow access to device special files */
#define MS_NOEXEC 8 /* Disallow program execution */
#define MS_SYNCHRONOUS 16 /* Writes are synced at once */
#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
#define MS_BIND 4096
#define MS_MOVE 8192
#define MS_REC 16384
#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence.
MS_VERBOSE is deprecated. */
#define MS_SILENT 32768
#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */
#define MS_UNBINDABLE (1<<17) /* change to unbindable */
#define MS_PRIVATE (1<<18) /* change to private */
#define MS_SLAVE (1<<19) /* change to slave */
#define MS_SHARED (1<<20) /* change to shared */
#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
#define MS_SUBMOUNT (1<<26)
#define MS_NOREMOTELOCK (1<<27)
#define MS_NOSEC (1<<28)
#define MS_BORN (1<<29)
#define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31)
/*
* Superblock flags that can be altered by MS_REMOUNT
*/
#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
MS_LAZYTIME)
/*
* Old magic mount flag and mask
*/
#define MS_MGC_VAL 0xC0ED0000
#define MS_MGC_MSK 0xffff0000
/*
* open_tree() flags.
*/
#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */
#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */
/*
* move_mount() flags.
*/
#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */
#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */
#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
#define MOVE_MOUNT__MASK 0x00000177
/*
* fsopen() flags.
*/
#define FSOPEN_CLOEXEC 0x00000001
/*
* fspick() flags.
*/
#define FSPICK_CLOEXEC 0x00000001
#define FSPICK_SYMLINK_NOFOLLOW 0x00000002
#define FSPICK_NO_AUTOMOUNT 0x00000004
#define FSPICK_EMPTY_PATH 0x00000008
/*
* The type of fsconfig() call made.
*/
enum fsconfig_command {
FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */
FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */
FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */
FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
};
/*
* fsmount() flags.
*/
#define FSMOUNT_CLOEXEC 0x00000001
/*
* Mount attributes.
*/
#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */
#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */
#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */
#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */
#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */
#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */
#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */
/*
* mount_setattr()
*/
struct mount_attr {
__u64 attr_set;
__u64 attr_clr;
__u64 propagation;
__u64 userns_fd;
};
/* List of all mount_attr versions. */
#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
#endif /* _UAPI_LINUX_MOUNT_H */