overlayfs fixes for 5.11-rc7

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCYBuyTQAKCRDh3BK/laaZ
 PBBhAPwLy3ksQLhY7in4I8aKrSyWRpaCSAeLQUitxnX3eQiQnAD/S1EEIapwradV
 y4ou1PBRsGnhwNgArXODVCcTgqDJqw8=
 =GjU4
 -----END PGP SIGNATURE-----

Merge tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs fixes from Miklos Szeredi:

 - Fix capability conversion and minor overlayfs bugs that are related
   to the unprivileged overlay mounts introduced in this cycle.

 - Fix two recent (v5.10) and one old (v4.10) bug.

 - Clean up security xattr copy-up (related to a SELinux regression).

* tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: implement volatile-specific fsync error behaviour
  ovl: skip getxattr of security labels
  ovl: fix dentry leak in ovl_get_redirect
  ovl: avoid deadlock on directory ioctl
  cap: fix conversions on getxattr
  ovl: perform vfs_getxattr() with mounter creds
  ovl: add warning on user_ns mismatch
This commit is contained in:
Linus Torvalds 2021-02-04 10:01:17 -08:00
commit 4cb2c00c43
11 changed files with 136 additions and 59 deletions

View file

@ -586,6 +586,14 @@ without significant effort.
The advantage of mounting with the "volatile" option is that all forms of
sync calls to the upper filesystem are omitted.
In order to avoid a giving a false sense of safety, the syncfs (and fsync)
semantics of volatile mounts are slightly different than that of the rest of
VFS. If any writeback error occurs on the upperdir's filesystem after a
volatile mount takes place, all sync functions will return an error. Once this
condition is reached, the filesystem will not recover, and every subsequent sync
call will return an error, even if the upperdir has not experience a new error
since the last sync call.
When overlay is mounted with "volatile" option, the directory
"$workdir/work/incompat/volatile" is created. During next mount, overlay
checks for this directory and refuses to mount if present. This is a strong

View file

@ -84,6 +84,14 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
if (ovl_is_private_xattr(sb, name))
continue;
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
@ -107,13 +115,6 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
goto retry;
}
error = security_inode_copy_up_xattr(name);
if (error < 0 && error != -EOPNOTSUPP)
break;
if (error == 1) {
error = 0;
continue; /* Discard */
}
error = vfs_setxattr(new, name, value, size, 0);
if (error) {
if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))

View file

@ -992,8 +992,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
tmp = dget_parent(d);
dput(d);
d = tmp;

View file

@ -398,8 +398,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
const struct cred *old_cred;
int ret;
if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb)))
return 0;
ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
if (ret <= 0)
return ret;
ret = ovl_real_fdget_meta(file, &real, !datasync);
if (ret)

View file

@ -352,7 +352,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
goto out;
if (!value && !upperdentry) {
old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getxattr(realdentry, name, NULL, 0);
revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
}

View file

@ -324,6 +324,7 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry);
bool ovl_is_metacopy_dentry(struct dentry *dentry);
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
int padding);
int ovl_sync_status(struct ovl_fs *ofs);
static inline bool ovl_is_impuredir(struct super_block *sb,
struct dentry *dentry)

View file

@ -81,6 +81,8 @@ struct ovl_fs {
atomic_long_t last_ino;
/* Whiteout dentry cache */
struct dentry *whiteout;
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq;
};
static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)

View file

@ -865,7 +865,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
struct ovl_dir_file *od = file->private_data;
struct dentry *dentry = file->f_path.dentry;
struct file *realfile = od->realfile;
struct file *old, *realfile = od->realfile;
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
return want_upper ? NULL : realfile;
@ -874,29 +874,20 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
* Need to check if we started out being a lower dir, but got copied up
*/
if (!od->is_upper) {
struct inode *inode = file_inode(file);
realfile = READ_ONCE(od->upperfile);
if (!realfile) {
struct path upperpath;
ovl_path_upper(dentry, &upperpath);
realfile = ovl_dir_open_realfile(file, &upperpath);
if (IS_ERR(realfile))
return realfile;
inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
inode_unlock(inode);
return realfile;
}
smp_store_release(&od->upperfile, realfile);
} else {
/* somebody has beaten us to it */
if (!IS_ERR(realfile))
fput(realfile);
realfile = od->upperfile;
old = cmpxchg_release(&od->upperfile, NULL, realfile);
if (old) {
fput(realfile);
realfile = old;
}
inode_unlock(inode);
}
}
@ -909,8 +900,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
struct file *realfile;
int err;
if (!ovl_should_sync(OVL_FS(file->f_path.dentry->d_sb)))
return 0;
err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
if (err <= 0)
return err;
realfile = ovl_dir_real_file(file, true);
err = PTR_ERR_OR_ZERO(realfile);

View file

@ -264,11 +264,20 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
struct super_block *upper_sb;
int ret;
if (!ovl_upper_mnt(ofs))
return 0;
ret = ovl_sync_status(ofs);
/*
* We have to always set the err, because the return value isn't
* checked in syncfs, and instead indirectly return an error via
* the sb's writeback errseq, which VFS inspects after this call.
*/
if (ret < 0) {
errseq_set(&sb->s_wb_err, -EIO);
return -EIO;
}
if (!ret)
return ret;
if (!ovl_should_sync(ofs))
return 0;
/*
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
* All the super blocks will be iterated, including upper_sb.
@ -1923,6 +1932,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
unsigned int numlower;
int err;
err = -EIO;
if (WARN_ON(sb->s_user_ns != current_user_ns()))
goto out;
sb->s_d_op = &ovl_dentry_operations;
err = -ENOMEM;
@ -1989,6 +2002,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ovl_super_operations;
if (ofs->config.upperdir) {
struct super_block *upper_sb;
if (!ofs->config.workdir) {
pr_err("missing 'workdir'\n");
goto out_err;
@ -1998,6 +2013,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_err;
upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
if (!ovl_should_sync(ofs)) {
ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
err = -EIO;
pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
goto out_err;
}
}
err = ovl_get_workdir(sb, ofs, &upperpath);
if (err)
goto out_err;
@ -2005,9 +2030,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ofs->workdir)
sb->s_flags |= SB_RDONLY;
sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
sb->s_stack_depth = upper_sb->s_stack_depth;
sb->s_time_gran = upper_sb->s_time_gran;
}
oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
err = PTR_ERR(oe);

View file

@ -962,3 +962,30 @@ char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
kfree(buf);
return ERR_PTR(res);
}
/*
* ovl_sync_status() - Check fs sync status for volatile mounts
*
* Returns 1 if this is not a volatile mount and a real sync is required.
*
* Returns 0 if syncing can be skipped because mount is volatile, and no errors
* have occurred on the upperdir since the mount.
*
* Returns -errno if it is a volatile mount, and the error that occurred since
* the last mount. If the error code changes, it'll return the latest error
* code.
*/
int ovl_sync_status(struct ovl_fs *ofs)
{
struct vfsmount *mnt;
if (ovl_should_sync(ofs))
return 1;
mnt = ovl_upper_mnt(ofs);
if (!mnt)
return 0;
return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
}

View file

@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
{
int size, ret;
kuid_t kroot;
u32 nsmagic, magic;
uid_t root, mappedroot;
char *tmpbuf = NULL;
struct vfs_cap_data *cap;
struct vfs_ns_cap_data *nscap;
struct vfs_ns_cap_data *nscap = NULL;
struct dentry *dentry;
struct user_namespace *fs_ns;
@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
fs_ns = inode->i_sb->s_user_ns;
cap = (struct vfs_cap_data *) tmpbuf;
if (is_v2header((size_t) ret, cap)) {
/* If this is sizeof(vfs_cap_data) then we're ok with the
* on-disk value, so return that. */
if (alloc)
*buffer = tmpbuf;
else
kfree(tmpbuf);
return ret;
} else if (!is_v3header((size_t) ret, cap)) {
kfree(tmpbuf);
return -EINVAL;
root = 0;
} else if (is_v3header((size_t) ret, cap)) {
nscap = (struct vfs_ns_cap_data *) tmpbuf;
root = le32_to_cpu(nscap->rootid);
} else {
size = -EINVAL;
goto out_free;
}
nscap = (struct vfs_ns_cap_data *) tmpbuf;
root = le32_to_cpu(nscap->rootid);
kroot = make_kuid(fs_ns, root);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
mappedroot = from_kuid(current_user_ns(), kroot);
if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
size = sizeof(struct vfs_ns_cap_data);
if (alloc) {
*buffer = tmpbuf;
if (!nscap) {
/* v2 -> v3 conversion */
nscap = kzalloc(size, GFP_ATOMIC);
if (!nscap) {
size = -ENOMEM;
goto out_free;
}
nsmagic = VFS_CAP_REVISION_3;
magic = le32_to_cpu(cap->magic_etc);
if (magic & VFS_CAP_FLAGS_EFFECTIVE)
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
nscap->magic_etc = cpu_to_le32(nsmagic);
} else {
/* use allocated v3 buffer */
tmpbuf = NULL;
}
nscap->rootid = cpu_to_le32(mappedroot);
} else
kfree(tmpbuf);
return size;
*buffer = nscap;
}
goto out_free;
}
if (!rootid_owns_currentns(kroot)) {
kfree(tmpbuf);
return -EOPNOTSUPP;
size = -EOVERFLOW;
goto out_free;
}
/* This comes from a parent namespace. Return as a v2 capability */
size = sizeof(struct vfs_cap_data);
if (alloc) {
*buffer = kmalloc(size, GFP_ATOMIC);
if (*buffer) {
struct vfs_cap_data *cap = *buffer;
__le32 nsmagic, magic;
if (nscap) {
/* v3 -> v2 conversion */
cap = kzalloc(size, GFP_ATOMIC);
if (!cap) {
size = -ENOMEM;
goto out_free;
}
magic = VFS_CAP_REVISION_2;
nsmagic = le32_to_cpu(nscap->magic_etc);
if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
cap->magic_etc = cpu_to_le32(magic);
} else {
size = -ENOMEM;
/* use unconverted v2 */
tmpbuf = NULL;
}
*buffer = cap;
}
out_free:
kfree(tmpbuf);
return size;
}