vfs-6.8.rw

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZZUzXQAKCRCRxhvAZXjc
 ogOtAQDpqUp1zY4dV/dZisCJ5xarZTsSZ1AvgmcxZBtS0NhbdgEAshWvYGA9ryS/
 ChL5jjtjjZDLhRA//reoFHTQIrdp2w8=
 =bF+R
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs rw updates from Christian Brauner:
 "This contains updates from Amir for read-write backing file helpers
  for stacking filesystems such as overlayfs:

   - Fanotify is currently in the process of introducing pre content
     events. Roughly, a new permission event will be added indicating
     that it is safe to write to the file being accessed. These events
     are used by hierarchical storage managers to e.g., fill the content
     of files on first access.

     During that work we noticed that our current permission checking is
     inconsistent in rw_verify_area() and remap_verify_area().
     Especially in the splice code permission checking is done multiple
     times. For example, one time for the whole range and then again for
     partial ranges inside the iterator.

     In addition, we mostly do permission checking before we call
     file_start_write() except for a few places where we call it after.
     For pre-content events we need such permission checking to be done
     before file_start_write(). So this is a nice reason to clean this
     all up.

     After this series, all permission checking is done before
     file_start_write().

     As part of this cleanup we also massaged the splice code a bit. We
     got rid of a few helpers because we are alredy drowning in special
     read-write helpers. We also cleaned up the return types for splice
     helpers.

   - Introduce generic read-write helpers for backing files. This lifts
     some overlayfs code to common code so it can be used by the FUSE
     passthrough work coming in over the next cycles. Make Amir and
     Miklos the maintainers for this new subsystem of the vfs"

* tag 'vfs-6.8.rw' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
  fs: fix __sb_write_started() kerneldoc formatting
  fs: factor out backing_file_mmap() helper
  fs: factor out backing_file_splice_{read,write}() helpers
  fs: factor out backing_file_{read,write}_iter() helpers
  fs: prepare for stackable filesystems backing file helpers
  fsnotify: optionally pass access range in file permission hooks
  fsnotify: assert that file_start_write() is not held in permission hooks
  fsnotify: split fsnotify_perm() into two hooks
  fs: use splice_copy_file_range() inline helper
  splice: return type ssize_t from all helpers
  fs: use do_splice_direct() for nfsd/ksmbd server-side-copy
  fs: move file_start_write() into direct_splice_actor()
  fs: fork splice_file_range() from do_splice_direct()
  fs: create {sb,file}_write_not_started() helpers
  fs: create file_write_started() helper
  fs: create __sb_write_started() helper
  fs: move kiocb_start_write() into vfs_iocb_iter_write()
  fs: move permission hook out of do_iter_read()
  fs: move permission hook out of do_iter_write()
  fs: move file_start_write() into vfs_iter_write()
  ...
This commit is contained in:
Linus Torvalds 2024-01-08 11:11:51 -08:00
commit bb93c5ed45
30 changed files with 956 additions and 582 deletions

View File

@ -8143,6 +8143,15 @@ S: Supported
F: fs/iomap/
F: include/linux/iomap.h
FILESYSTEMS [STACKABLE]
M: Miklos Szeredi <miklos@szeredi.hu>
M: Amir Goldstein <amir73il@gmail.com>
L: linux-fsdevel@vger.kernel.org
L: linux-unionfs@vger.kernel.org
S: Maintained
F: fs/backing-file.c
F: include/linux/backing-file.h
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
M: Riku Voipio <riku.voipio@iki.fi>
L: linux-hwmon@vger.kernel.org

View File

@ -245,9 +245,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len);
file_start_write(file);
bw = vfs_iter_write(file, &i, ppos, 0);
file_end_write(file);
if (likely(bw == bvec->bv_len))
return 0;

View File

@ -18,6 +18,10 @@ config VALIDATE_FS_PARSER
config FS_IOMAP
bool
# Stackable filesystems
config FS_STACK
bool
config BUFFER_HEAD
bool

View File

@ -39,6 +39,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
obj-$(CONFIG_FS_STACK) += backing-file.o
obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/

336
fs/backing-file.c Normal file
View File

@ -0,0 +1,336 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Common helpers for stackable filesystems and backing files.
*
* Forked from fs/overlayfs/file.c.
*
* Copyright (C) 2017 Red Hat, Inc.
* Copyright (C) 2023 CTERA Networks.
*/
#include <linux/fs.h>
#include <linux/backing-file.h>
#include <linux/splice.h>
#include <linux/mm.h>
#include "internal.h"
/**
* backing_file_open - open a backing file for kernel internal use
* @user_path: path that the user reuqested to open
* @flags: open flags
* @real_path: path of the backing file
* @cred: credentials for open
*
* Open a backing file for a stackable filesystem (e.g., overlayfs).
* @user_path may be on the stackable filesystem and @real_path on the
* underlying filesystem. In this case, we want to be able to return the
* @user_path of the stackable filesystem. This is done by embedding the
* returned file into a container structure that also stores the stacked
* file's path, which can be retrieved using backing_file_user_path().
*/
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred)
{
struct file *f;
int error;
f = alloc_empty_backing_file(flags, cred);
if (IS_ERR(f))
return f;
path_get(user_path);
*backing_file_user_path(f) = *user_path;
error = vfs_open(real_path, f);
if (error) {
fput(f);
f = ERR_PTR(error);
}
return f;
}
EXPORT_SYMBOL_GPL(backing_file_open);
struct backing_aio {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
/* used for aio completion */
void (*end_write)(struct file *);
struct work_struct work;
long res;
};
static struct kmem_cache *backing_aio_cachep;
#define BACKING_IOCB_MASK \
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
static rwf_t iocb_to_rw_flags(int flags)
{
return (__force rwf_t)(flags & BACKING_IOCB_MASK);
}
static void backing_aio_put(struct backing_aio *aio)
{
if (refcount_dec_and_test(&aio->ref)) {
fput(aio->iocb.ki_filp);
kmem_cache_free(backing_aio_cachep, aio);
}
}
static void backing_aio_cleanup(struct backing_aio *aio, long res)
{
struct kiocb *iocb = &aio->iocb;
struct kiocb *orig_iocb = aio->orig_iocb;
if (aio->end_write)
aio->end_write(orig_iocb->ki_filp);
orig_iocb->ki_pos = iocb->ki_pos;
backing_aio_put(aio);
}
static void backing_aio_rw_complete(struct kiocb *iocb, long res)
{
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
struct kiocb *orig_iocb = aio->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE)
kiocb_end_write(iocb);
backing_aio_cleanup(aio, res);
orig_iocb->ki_complete(orig_iocb, res);
}
static void backing_aio_complete_work(struct work_struct *work)
{
struct backing_aio *aio = container_of(work, struct backing_aio, work);
backing_aio_rw_complete(&aio->iocb, aio->res);
}
static void backing_aio_queue_completion(struct kiocb *iocb, long res)
{
struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
/*
* Punt to a work queue to serialize updates of mtime/size.
*/
aio->res = res;
INIT_WORK(&aio->work, backing_aio_complete_work);
queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
&aio->work);
}
static int backing_aio_init_wq(struct kiocb *iocb)
{
struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
if (sb->s_dio_done_wq)
return 0;
return sb_init_dio_done_wq(sb);
}
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx)
{
struct backing_aio *aio = NULL;
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
return -EIO;
if (!iov_iter_count(iter))
return 0;
if (iocb->ki_flags & IOCB_DIRECT &&
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
old_cred = override_creds(ctx->cred);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(flags);
ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
} else {
ret = -ENOMEM;
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
if (!aio)
goto out;
aio->orig_iocb = iocb;
kiocb_clone(&aio->iocb, iocb, get_file(file));
aio->iocb.ki_complete = backing_aio_rw_complete;
refcount_set(&aio->ref, 2);
ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
backing_aio_put(aio);
if (ret != -EIOCBQUEUED)
backing_aio_cleanup(aio, ret);
}
out:
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_read_iter);
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
return -EIO;
if (!iov_iter_count(iter))
return 0;
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
if (iocb->ki_flags & IOCB_DIRECT &&
!(file->f_mode & FMODE_CAN_ODIRECT))
return -EINVAL;
/*
* Stacked filesystems don't support deferred completions, don't copy
* this property in case it is set by the issuer.
*/
flags &= ~IOCB_DIO_CALLER_COMP;
old_cred = override_creds(ctx->cred);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(flags);
ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
if (ctx->end_write)
ctx->end_write(ctx->user_file);
} else {
struct backing_aio *aio;
ret = backing_aio_init_wq(iocb);
if (ret)
goto out;
ret = -ENOMEM;
aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
if (!aio)
goto out;
aio->orig_iocb = iocb;
aio->end_write = ctx->end_write;
kiocb_clone(&aio->iocb, iocb, get_file(file));
aio->iocb.ki_flags = flags;
aio->iocb.ki_complete = backing_aio_queue_completion;
refcount_set(&aio->ref, 2);
ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
backing_aio_put(aio);
if (ret != -EIOCBQUEUED)
backing_aio_cleanup(aio, ret);
}
out:
revert_creds(old_cred);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_write_iter);
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
return -EIO;
old_cred = override_creds(ctx->cred);
ret = vfs_splice_read(in, ppos, pipe, len, flags);
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_splice_read);
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
ssize_t ret;
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
return -EIO;
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
old_cred = override_creds(ctx->cred);
file_start_write(out);
ret = iter_file_splice_write(pipe, out, ppos, len, flags);
file_end_write(out);
revert_creds(old_cred);
if (ctx->end_write)
ctx->end_write(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_splice_write);
int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx)
{
const struct cred *old_cred;
int ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
WARN_ON_ONCE(ctx->user_file != vma->vm_file))
return -EIO;
if (!file->f_op->mmap)
return -ENODEV;
vma_set_file(vma, file);
old_cred = override_creds(ctx->cred);
ret = call_mmap(vma->vm_file, vma);
revert_creds(old_cred);
if (ctx->accessed)
ctx->accessed(ctx->user_file);
return ret;
}
EXPORT_SYMBOL_GPL(backing_file_mmap);
static int __init backing_aio_init(void)
{
backing_aio_cachep = kmem_cache_create("backing_aio",
sizeof(struct backing_aio),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!backing_aio_cachep)
return -ENOMEM;
return 0;
}
fs_initcall(backing_aio_init);

View File

@ -4533,29 +4533,29 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
if (ret < 0)
goto out_acct;
file_start_write(file);
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_end_write;
goto out_iov;
}
pos = args.offset;
ret = rw_verify_area(WRITE, file, &pos, args.len);
if (ret < 0)
goto out_end_write;
goto out_iov;
init_sync_kiocb(&kiocb, file);
ret = kiocb_set_rw_flags(&kiocb, 0);
if (ret)
goto out_end_write;
goto out_iov;
kiocb.ki_pos = pos;
file_start_write(file);
ret = btrfs_do_write_iter(&kiocb, &iter, &args);
if (ret > 0)
fsnotify_modify(file);
out_end_write:
file_end_write(file);
out_iov:
kfree(iov);
out_acct:
if (ret > 0)

View File

@ -259,7 +259,8 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret);
kiocb_end_write(iocb);
if (ki->was_async)
kiocb_end_write(iocb);
if (ret < 0)
trace_cachefiles_io_error(object, inode, ret,
@ -319,8 +320,6 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
kiocb_start_write(&ki->iocb);
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);

View File

@ -12,6 +12,7 @@
#include <linux/falloc.h>
#include <linux/iversion.h>
#include <linux/ktime.h>
#include <linux/splice.h>
#include "super.h"
#include "mds_client.h"
@ -3010,8 +3011,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
* {read,write}_iter, which will get caps again.
*/
put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
ret = do_splice_direct(src_file, &src_off, dst_file,
&dst_off, src_objlen, flags);
ret = splice_file_range(src_file, &src_off, dst_file, &dst_off,
src_objlen);
/* Abort on short copies or on error */
if (ret < (long)src_objlen) {
doutc(cl, "Failed partial copy (%zd)\n", ret);
@ -3065,8 +3066,8 @@ out_caps:
*/
if (len && (len < src_ci->i_layout.object_size)) {
doutc(cl, "Final partial copy of %zu bytes\n", len);
bytes = do_splice_direct(src_file, &src_off, dst_file,
&dst_off, len, flags);
bytes = splice_file_range(src_file, &src_off, dst_file,
&dst_off, len);
if (bytes > 0)
ret += bytes;
else
@ -3089,8 +3090,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags);
ret = splice_copy_file_range(src_file, src_off, dst_file,
dst_off, len);
return ret;
}

View File

@ -79,14 +79,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
if (ret)
goto finish_write;
file_start_write(host_file);
inode_lock(coda_inode);
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
inode_set_mtime_to_ts(coda_inode, inode_set_ctime_current(coda_inode));
inode_unlock(coda_inode);
file_end_write(host_file);
finish_write:
venus_access_intent(coda_inode->i_sb, coda_i2f(coda_inode),

View File

@ -19,6 +19,7 @@
#include <linux/uio.h>
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/splice.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode,
@ -3195,8 +3196,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(src_file, src_off, dst_file,
dst_off, len, flags);
ret = splice_copy_file_range(src_file, src_off, dst_file,
dst_off, len);
return ret;
}

View File

@ -244,10 +244,10 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags,
/*
* fs/splice.c:
*/
long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags);
ssize_t splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags);
/*
* fs/xattr.c:

View File

@ -10,6 +10,7 @@
#include <linux/mount.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_ssc.h>
#include <linux/splice.h>
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
@ -195,8 +196,8 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
ret = __nfs4_copy_file_range(file_in, pos_in, file_out, pos_out, count,
flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
ret = generic_copy_file_range(file_in, pos_in, file_out,
pos_out, count, flags);
ret = splice_copy_file_range(file_in, pos_in, file_out,
pos_out, count);
return ret;
}

View File

@ -1039,7 +1039,10 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
host_err = rw_verify_area(READ, file, &offset, *count);
if (!host_err)
host_err = splice_direct_to_actor(file, &sd,
nfsd_direct_splice_actor);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@ -1176,9 +1179,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
file_start_write(file);
host_err = vfs_iter_write(file, &iter, &pos, flags);
file_end_write(file);
if (host_err < 0) {
commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr;

View File

@ -304,6 +304,10 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret)
return ret;
ret = fsnotify_file_area_perm(file, MAY_WRITE, &offset, len);
if (ret)
return ret;
if (S_ISFIFO(inode->i_mode))
return -ESPIPE;
@ -1178,44 +1182,6 @@ struct file *kernel_file_open(const struct path *path, int flags,
}
EXPORT_SYMBOL_GPL(kernel_file_open);
/**
* backing_file_open - open a backing file for kernel internal use
* @user_path: path that the user reuqested to open
* @flags: open flags
* @real_path: path of the backing file
* @cred: credentials for open
*
* Open a backing file for a stackable filesystem (e.g., overlayfs).
* @user_path may be on the stackable filesystem and @real_path on the
* underlying filesystem. In this case, we want to be able to return the
* @user_path of the stackable filesystem. This is done by embedding the
* returned file into a container structure that also stores the stacked
* file's path, which can be retrieved using backing_file_user_path().
*/
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred)
{
struct file *f;
int error;
f = alloc_empty_backing_file(flags, cred);
if (IS_ERR(f))
return f;
path_get(user_path);
*backing_file_user_path(f) = *user_path;
f->f_path = *real_path;
error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
if (error) {
fput(f);
f = ERR_PTR(error);
}
return f;
}
EXPORT_SYMBOL_GPL(backing_file_open);
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config OVERLAY_FS
tristate "Overlay filesystem support"
select FS_STACK
select EXPORTFS
help
An overlay filesystem combines two filesystems - an 'upper' filesystem

View File

@ -230,6 +230,19 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
return ovl_real_fileattr_set(new, &newfa);
}
static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
{
loff_t tmp;
if (WARN_ON_ONCE(pos != pos2))
return -EIO;
if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
return -EIO;
if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
return -EIO;
return 0;
}
static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
struct file *new_file, loff_t len)
{
@ -244,13 +257,20 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
int error = 0;
ovl_path_lowerdata(dentry, &datapath);
if (WARN_ON(datapath.dentry == NULL))
if (WARN_ON_ONCE(datapath.dentry == NULL) ||
WARN_ON_ONCE(len < 0))
return -EIO;
old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
error = rw_verify_area(READ, old_file, &old_pos, len);
if (!error)
error = rw_verify_area(WRITE, new_file, &new_pos, len);
if (error)
goto out_fput;
/* Try to use clone_file_range to clone up within the same fs */
ovl_start_write(dentry);
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
@ -265,7 +285,7 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
while (len) {
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
long bytes;
ssize_t bytes;
if (len < this_len)
this_len = len;
@ -309,11 +329,13 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
}
}
ovl_start_write(dentry);
error = ovl_verify_area(old_pos, new_pos, this_len, len);
if (error)
break;
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
ovl_end_write(dentry);
if (bytes <= 0) {
error = bytes;
break;

View File

@ -9,25 +9,11 @@
#include <linux/xattr.h>
#include <linux/uio.h>
#include <linux/uaccess.h>
#include <linux/splice.h>
#include <linux/security.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/backing-file.h>
#include "overlayfs.h"
#include "../internal.h" /* for sb_init_dio_done_wq */
struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
/* used for aio completion */
struct work_struct work;
long res;
};
static struct kmem_cache *ovl_aio_request_cachep;
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
{
if (realinode != ovl_inode_upper(inode))
@ -274,83 +260,16 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path);
}
#define OVL_IOCB_MASK \
(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
static rwf_t iocb_to_rw_flags(int flags)
{
return (__force rwf_t)(flags & OVL_IOCB_MASK);
}
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
{
if (refcount_dec_and_test(&aio_req->ref)) {
fput(aio_req->iocb.ki_filp);
kmem_cache_free(ovl_aio_request_cachep, aio_req);
}
}
static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
{
struct kiocb *iocb = &aio_req->iocb;
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
kiocb_end_write(iocb);
ovl_file_modified(orig_iocb->ki_filp);
}
orig_iocb->ki_pos = iocb->ki_pos;
ovl_aio_put(aio_req);
}
static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
ovl_aio_cleanup_handler(aio_req);
orig_iocb->ki_complete(orig_iocb, res);
}
static void ovl_aio_complete_work(struct work_struct *work)
{
struct ovl_aio_req *aio_req = container_of(work,
struct ovl_aio_req, work);
ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
}
static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
{
struct ovl_aio_req *aio_req = container_of(iocb,
struct ovl_aio_req, iocb);
struct kiocb *orig_iocb = aio_req->orig_iocb;
/*
* Punt to a work queue to serialize updates of mtime/size.
*/
aio_req->res = res;
INIT_WORK(&aio_req->work, ovl_aio_complete_work);
queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
&aio_req->work);
}
static int ovl_init_aio_done_wq(struct super_block *sb)
{
if (sb->s_dio_done_wq)
return 0;
return sb_init_dio_done_wq(sb);
}
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct fd real;
const struct cred *old_cred;
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(file_inode(file)->i_sb),
.user_file = file,
.accessed = ovl_file_accessed,
};
if (!iov_iter_count(iter))
return 0;
@ -359,37 +278,8 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (ret)
return ret;
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
!(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
} else {
struct ovl_aio_req *aio_req;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
ovl_file_accessed(file);
out_fdput:
ret = backing_file_read_iter(real.file, iter, iocb, iocb->ki_flags,
&ctx);
fdput(real);
return ret;
@ -400,9 +290,13 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct fd real;
const struct cred *old_cred;
ssize_t ret;
int ifl = iocb->ki_flags;
struct backing_file_ctx ctx = {
.cred = ovl_creds(inode->i_sb),
.user_file = file,
.end_write = ovl_file_modified,
};
if (!iov_iter_count(iter))
return 0;
@ -410,19 +304,11 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
inode_lock(inode);
/* Update mode */
ovl_copyattr(inode);
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
ret = ovl_real_fdget(file, &real);
if (ret)
goto out_unlock;
ret = -EINVAL;
if (iocb->ki_flags & IOCB_DIRECT &&
!(real.file->f_mode & FMODE_CAN_ODIRECT))
goto out_fdput;
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
@ -431,42 +317,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
* this property in case it is set by the issuer.
*/
ifl &= ~IOCB_DIO_CALLER_COMP;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
rwf_t rwf = iocb_to_rw_flags(ifl);
file_start_write(real.file);
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
file_end_write(real.file);
/* Update size */
ovl_file_modified(file);
} else {
struct ovl_aio_req *aio_req;
ret = ovl_init_aio_done_wq(inode->i_sb);
if (ret)
goto out;
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
goto out;
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
ovl_aio_cleanup_handler(aio_req);
}
out:
revert_creds(old_cred);
out_fdput:
ret = backing_file_write_iter(real.file, iter, iocb, ifl, &ctx);
fdput(real);
out_unlock:
@ -479,20 +330,21 @@ static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
const struct cred *old_cred;
struct fd real;
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(file_inode(in)->i_sb),
.user_file = in,
.accessed = ovl_file_accessed,
};
ret = ovl_real_fdget(in, &real);
if (ret)
return ret;
old_cred = ovl_override_creds(file_inode(in)->i_sb);
ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
revert_creds(old_cred);
ovl_file_accessed(in);
ret = backing_file_splice_read(real.file, ppos, pipe, len, flags, &ctx);
fdput(real);
return ret;
}
@ -508,30 +360,23 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
struct fd real;
const struct cred *old_cred;
struct inode *inode = file_inode(out);
ssize_t ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(inode->i_sb),
.user_file = out,
.end_write = ovl_file_modified,
};
inode_lock(inode);
/* Update mode */
ovl_copyattr(inode);
ret = file_remove_privs(out);
if (ret)
goto out_unlock;
ret = ovl_real_fdget(out, &real);
if (ret)
goto out_unlock;
old_cred = ovl_override_creds(inode->i_sb);
file_start_write(real.file);
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
file_end_write(real.file);
/* Update size */
ovl_file_modified(out);
revert_creds(old_cred);
ret = backing_file_splice_write(pipe, real.file, ppos, len, flags, &ctx);
fdput(real);
out_unlock:
@ -569,23 +414,13 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
{
struct file *realfile = file->private_data;
const struct cred *old_cred;
int ret;
struct backing_file_ctx ctx = {
.cred = ovl_creds(file_inode(file)->i_sb),
.user_file = file,
.accessed = ovl_file_accessed,
};
if (!realfile->f_op->mmap)
return -ENODEV;
if (WARN_ON(file != vma->vm_file))
return -EIO;
vma_set_file(vma, realfile);
old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = call_mmap(vma->vm_file, vma);
revert_creds(old_cred);
ovl_file_accessed(file);
return ret;
return backing_file_mmap(realfile, vma, &ctx);
}
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
@ -778,19 +613,3 @@ const struct file_operations ovl_file_operations = {
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
};
int __init ovl_aio_request_cache_init(void)
{
ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
sizeof(struct ovl_aio_req),
0, SLAB_HWCACHE_ALIGN, NULL);
if (!ovl_aio_request_cachep)
return -ENOMEM;
return 0;
}
void ovl_aio_request_cache_destroy(void)
{
kmem_cache_destroy(ovl_aio_request_cachep);
}

View File

@ -425,6 +425,12 @@ int ovl_want_write(struct dentry *dentry);
void ovl_drop_write(struct dentry *dentry);
struct dentry *ovl_workdir(struct dentry *dentry);
const struct cred *ovl_override_creds(struct super_block *sb);
static inline const struct cred *ovl_creds(struct super_block *sb)
{
return OVL_FS(sb)->creator_cred;
}
int ovl_can_decode_fh(struct super_block *sb);
struct dentry *ovl_indexdir(struct super_block *sb);
bool ovl_index_all(struct super_block *sb);
@ -837,8 +843,6 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
/* file.c */
extern const struct file_operations ovl_file_operations;
int __init ovl_aio_request_cache_init(void);
void ovl_aio_request_cache_destroy(void);
int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa);
int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa);
int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa);

View File

@ -1501,14 +1501,10 @@ static int __init ovl_init(void)
if (ovl_inode_cachep == NULL)
return -ENOMEM;
err = ovl_aio_request_cache_init();
if (!err) {
err = register_filesystem(&ovl_fs_type);
if (!err)
return 0;
err = register_filesystem(&ovl_fs_type);
if (!err)
return 0;
ovl_aio_request_cache_destroy();
}
kmem_cache_destroy(ovl_inode_cachep);
return err;
@ -1524,7 +1520,6 @@ static void __exit ovl_exit(void)
*/
rcu_barrier();
kmem_cache_destroy(ovl_inode_cachep);
ovl_aio_request_cache_destroy();
}
module_init(ovl_init);

View File

@ -354,6 +354,9 @@ out_putf:
int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
{
int mask = read_write == READ ? MAY_READ : MAY_WRITE;
int ret;
if (unlikely((ssize_t) count < 0))
return -EINVAL;
@ -371,8 +374,11 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
}
}
return security_file_permission(file,
read_write == READ ? MAY_READ : MAY_WRITE);
ret = security_file_permission(file, mask);
if (ret)
return ret;
return fsnotify_file_area_perm(file, mask, ppos, count);
}
EXPORT_SYMBOL(rw_verify_area);
@ -773,34 +779,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
return ret;
}
static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
tot_len = iov_iter_count(iter);
if (!tot_len)
goto out;
ret = rw_verify_area(READ, file, pos, tot_len);
if (ret < 0)
return ret;
if (file->f_op->read_iter)
ret = do_iter_readv_writev(file, iter, pos, READ, flags);
else
ret = do_loop_readv_writev(file, iter, pos, READ, flags);
out:
if (ret >= 0)
fsnotify_access(file);
return ret;
}
ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
struct iov_iter *iter)
{
@ -830,41 +808,37 @@ out:
EXPORT_SYMBOL(vfs_iocb_iter_read);
ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags)
{
if (!file->f_op->read_iter)
return -EINVAL;
return do_iter_read(file, iter, ppos, flags);
}
EXPORT_SYMBOL(vfs_iter_read);
static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
loff_t *pos, rwf_t flags)
rwf_t flags)
{
size_t tot_len;
ssize_t ret = 0;
if (!(file->f_mode & FMODE_WRITE))
if (!file->f_op->read_iter)
return -EINVAL;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
tot_len = iov_iter_count(iter);
if (!tot_len)
return 0;
ret = rw_verify_area(WRITE, file, pos, tot_len);
goto out;
ret = rw_verify_area(READ, file, ppos, tot_len);
if (ret < 0)
return ret;
if (file->f_op->write_iter)
ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
else
ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
if (ret > 0)
fsnotify_modify(file);
ret = do_iter_readv_writev(file, iter, ppos, READ, flags);
out:
if (ret >= 0)
fsnotify_access(file);
return ret;
}
EXPORT_SYMBOL(vfs_iter_read);
/*
* Caller is responsible for calling kiocb_end_write() on completion
* if async iocb was queued.
*/
ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
struct iov_iter *iter)
{
@ -885,7 +859,10 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
if (ret < 0)
return ret;
kiocb_start_write(iocb);
ret = call_write_iter(file, iocb, iter);
if (ret != -EIOCBQUEUED)
kiocb_end_write(iocb);
if (ret > 0)
fsnotify_modify(file);
@ -894,46 +871,111 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
EXPORT_SYMBOL(vfs_iocb_iter_write);
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
rwf_t flags)
rwf_t flags)
{
size_t tot_len;
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (!file->f_op->write_iter)
return -EINVAL;
return do_iter_write(file, iter, ppos, flags);
tot_len = iov_iter_count(iter);
if (!tot_len)
return 0;
ret = rw_verify_area(WRITE, file, ppos, tot_len);
if (ret < 0)
return ret;
file_start_write(file);
ret = do_iter_readv_writev(file, iter, ppos, WRITE, flags);
if (ret > 0)
fsnotify_modify(file);
file_end_write(file);
return ret;
}
EXPORT_SYMBOL(vfs_iter_write);
static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
size_t tot_len;
ssize_t ret = 0;
ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
if (ret >= 0) {
ret = do_iter_read(file, &iter, pos, flags);
kfree(iov);
}
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov,
&iter);
if (ret < 0)
return ret;
tot_len = iov_iter_count(&iter);
if (!tot_len)
goto out;
ret = rw_verify_area(READ, file, pos, tot_len);
if (ret < 0)
goto out;
if (file->f_op->read_iter)
ret = do_iter_readv_writev(file, &iter, pos, READ, flags);
else
ret = do_loop_readv_writev(file, &iter, pos, READ, flags);
out:
if (ret >= 0)
fsnotify_access(file);
kfree(iov);
return ret;
}
static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags)
unsigned long vlen, loff_t *pos, rwf_t flags)
{
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
ssize_t ret;
size_t tot_len;
ssize_t ret = 0;
ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
if (ret >= 0) {
file_start_write(file);
ret = do_iter_write(file, &iter, pos, flags);
file_end_write(file);
kfree(iov);
}
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
ret = import_iovec(ITER_SOURCE, vec, vlen, ARRAY_SIZE(iovstack), &iov,
&iter);
if (ret < 0)
return ret;
tot_len = iov_iter_count(&iter);
if (!tot_len)
goto out;
ret = rw_verify_area(WRITE, file, pos, tot_len);
if (ret < 0)
goto out;
file_start_write(file);
if (file->f_op->write_iter)
ret = do_iter_readv_writev(file, &iter, pos, WRITE, flags);
else
ret = do_loop_readv_writev(file, &iter, pos, WRITE, flags);
if (ret > 0)
fsnotify_modify(file);
file_end_write(file);
out:
kfree(iov);
return ret;
}
@ -1178,7 +1220,7 @@ COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
#endif /* CONFIG_COMPAT */
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
size_t count, loff_t max)
size_t count, loff_t max)
{
struct fd in, out;
struct inode *in_inode, *out_inode;
@ -1250,10 +1292,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
retval = rw_verify_area(WRITE, out.file, &out_pos, count);
if (retval < 0)
goto fput_out;
file_start_write(out.file);
retval = do_splice_direct(in.file, &pos, out.file, &out_pos,
count, fl);
file_end_write(out.file);
} else {
if (out.file->f_flags & O_NONBLOCK)
fl |= SPLICE_F_NONBLOCK;
@ -1362,38 +1402,6 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
}
#endif
/**
* generic_copy_file_range - copy data between two files
* @file_in: file structure to read from
* @pos_in: file offset to read from
* @file_out: file structure to write data to
* @pos_out: file offset to write data to
* @len: amount of data to copy
* @flags: copy flags
*
* This is a generic filesystem helper to copy data from one file to another.
* It has no constraints on the source or destination file owners - the files
* can belong to different superblocks and different filesystem types. Short
* copies are allowed.
*
* This should be called from the @file_out filesystem, as per the
* ->copy_file_range() method.
*
* Returns the number of bytes copied or a negative error indicating the
* failure.
*/
ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags)
{
lockdep_assert(sb_write_started(file_inode(file_out)->i_sb));
return do_splice_direct(file_in, &pos_in, file_out, &pos_out,
len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
}
EXPORT_SYMBOL(generic_copy_file_range);
/*
* Performs necessary checks before doing a file copy
*
@ -1478,6 +1486,7 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
{
ssize_t ret;
bool splice = flags & COPY_FILE_SPLICE;
bool samesb = file_inode(file_in)->i_sb == file_inode(file_out)->i_sb;
if (flags & ~COPY_FILE_SPLICE)
return -EINVAL;
@ -1509,19 +1518,24 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
ret = file_out->f_op->copy_file_range(file_in, pos_in,
file_out, pos_out,
len, flags);
goto done;
}
if (!splice && file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
} else if (!splice && file_in->f_op->remap_file_range && samesb) {
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
min_t(loff_t, MAX_RW_COUNT, len),
REMAP_FILE_CAN_SHORTEN);
if (ret > 0)
goto done;
/* fallback to splice */
if (ret <= 0)
splice = true;
} else if (samesb) {
/* Fallback to splice for same sb copy for backward compat */
splice = true;
}
file_end_write(file_out);
if (!splice)
goto done;
/*
* We can get here for same sb copy of filesystems that do not implement
* ->copy_file_range() in case filesystem does not support clone or in
@ -1533,11 +1547,16 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
* and which filesystems do not, that will allow userspace tools to
* make consistent desicions w.r.t using copy_file_range().
*
* We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE.
* We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE
* for server-side-copy between any two sb.
*
* In any case, we call do_splice_direct() and not splice_file_range(),
* without file_start_write() held, to avoid possible deadlocks related
* to splicing from input file, while file_start_write() is held on
* the output file on a different sb.
*/
ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
flags);
ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
min_t(size_t, len, MAX_RW_COUNT), 0);
done:
if (ret > 0) {
fsnotify_access(file_in);
@ -1549,8 +1568,6 @@ done:
inc_syscr(current);
inc_syscw(current);
file_end_write(file_out);
return ret;
}
EXPORT_SYMBOL(vfs_copy_file_range);

View File

@ -96,6 +96,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
if (res)
goto out;
res = fsnotify_file_perm(file, MAY_READ);
if (res)
goto out;
res = down_read_killable(&inode->i_rwsem);
if (res)
goto out;

View File

@ -102,7 +102,9 @@ static int generic_remap_checks(struct file *file_in, loff_t pos_in,
static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
bool write)
{
int mask = write ? MAY_WRITE : MAY_READ;
loff_t tmp;
int ret;
if (unlikely(pos < 0 || len < 0))
return -EINVAL;
@ -110,7 +112,11 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
if (unlikely(check_add_overflow(pos, len, &tmp)))
return -EINVAL;
return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
ret = security_file_permission(file, mask);
if (ret)
return ret;
return fsnotify_file_area_perm(file, mask, &pos, len);
}
/*
@ -385,14 +391,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
if (!file_in->f_op->remap_file_range)
return -EOPNOTSUPP;
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
ret = remap_verify_area(file_out, pos_out, len, true);
if (ret)
return ret;
ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out, len, remap_flags);
if (ret < 0)
@ -410,6 +408,14 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
{
loff_t ret;
ret = remap_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
ret = remap_verify_area(file_out, pos_out, len, true);
if (ret)
return ret;
file_start_write(file_out);
ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
remap_flags);
@ -420,7 +426,7 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
EXPORT_SYMBOL(vfs_clone_file_range);
/* Check whether we are allowed to dedupe the destination file */
static bool allow_file_dedupe(struct file *file)
static bool may_dedupe_file(struct file *file)
{
struct mnt_idmap *idmap = file_mnt_idmap(file);
struct inode *inode = file_inode(file);
@ -445,24 +451,29 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
REMAP_FILE_CAN_SHORTEN));
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
/*
* This is redundant if called from vfs_dedupe_file_range(), but other
* callers need it and it's not performance sesitive...
*/
ret = remap_verify_area(src_file, src_pos, len, false);
if (ret)
goto out_drop_write;
return ret;
ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret)
goto out_drop_write;
return ret;
/*
* This needs to be called after remap_verify_area() because of
* sb_start_write() and before may_dedupe_file() because the mount's
* MAY_WRITE need to be checked with mnt_get_write_access_file() held.
*/
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
ret = -EPERM;
if (!allow_file_dedupe(dst_file))
if (!may_dedupe_file(dst_file))
goto out_drop_write;
ret = -EXDEV;

View File

@ -25,6 +25,7 @@
#include <linux/freezer.h>
#include <linux/namei.h>
#include <linux/random.h>
#include <linux/splice.h>
#include <linux/uuid.h>
#include <linux/xattr.h>
#include <uapi/linux/magic.h>
@ -1506,8 +1507,8 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
free_xid(xid);
if (rc == -EOPNOTSUPP || rc == -EXDEV)
rc = generic_copy_file_range(src_file, off, dst_file,
destoff, len, flags);
rc = splice_copy_file_range(src_file, off, dst_file,
destoff, len);
return rc;
}

View File

@ -201,7 +201,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
unsigned int tail = pipe->tail;
unsigned int head = pipe->head;
unsigned int mask = pipe->ring_size - 1;
int ret = 0, page_nr = 0;
ssize_t ret = 0;
int page_nr = 0;
if (!spd_pages)
return 0;
@ -673,10 +674,13 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
.u.file = out,
};
int nbufs = pipe->max_usage;
struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
GFP_KERNEL);
struct bio_vec *array;
ssize_t ret;
if (!out->f_op->write_iter)
return -EINVAL;
array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL);
if (unlikely(!array))
return -ENOMEM;
@ -684,6 +688,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
splice_from_pipe_begin(&sd);
while (sd.total_len) {
struct kiocb kiocb;
struct iov_iter from;
unsigned int head, tail, mask;
size_t left;
@ -733,7 +738,10 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
}
iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
ret = vfs_iter_write(out, &from, &sd.pos, 0);
init_sync_kiocb(&kiocb, out);
kiocb.ki_pos = sd.pos;
ret = call_write_iter(out, &kiocb, &from);
sd.pos = kiocb.ki_pos;
if (ret <= 0)
break;
@ -925,8 +933,8 @@ static int warn_unsupported(struct file *file, const char *op)
/*
* Attempt to initiate a splice from pipe to file.
*/
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
static ssize_t do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
if (unlikely(!out->f_op->splice_write))
return warn_unsupported(out, "write");
@ -944,6 +952,39 @@ static void do_splice_eof(struct splice_desc *sd)
sd->splice_eof(sd);
}
/*
* Callers already called rw_verify_area() on the entire range.
* No need to call it for sub ranges.
*/
static ssize_t do_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
unsigned int p_space;
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;
if (!len)
return 0;
/* Don't try to read more the pipe has space for. */
p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
len = min_t(size_t, len, p_space << PAGE_SHIFT);
if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
/*
* O_DIRECT and DAX don't deal with the pagecache, so we allocate a
* buffer, copy into it and splice that into the pipe.
*/
if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
return copy_splice_read(in, ppos, pipe, len, flags);
return in->f_op->splice_read(in, ppos, pipe, len, flags);
}
/**
* vfs_splice_read - Read data from a file and splice it into a pipe
* @in: File to splice from
@ -959,38 +1000,17 @@ static void do_splice_eof(struct splice_desc *sd)
* If successful, it returns the amount of data spliced, 0 if it hit the EOF or
* a hole and a negative error code otherwise.
*/
long vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
ssize_t vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
unsigned int p_space;
int ret;
if (unlikely(!(in->f_mode & FMODE_READ)))
return -EBADF;
if (!len)
return 0;
/* Don't try to read more the pipe has space for. */
p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
len = min_t(size_t, len, p_space << PAGE_SHIFT);
ssize_t ret;
ret = rw_verify_area(READ, in, ppos, len);
if (unlikely(ret < 0))
return ret;
if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT;
if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
/*
* O_DIRECT and DAX don't deal with the pagecache, so we allocate a
* buffer, copy into it and splice that into the pipe.
*/
if ((in->f_flags & O_DIRECT) || IS_DAX(in->f_mapping->host))
return copy_splice_read(in, ppos, pipe, len, flags);
return in->f_op->splice_read(in, ppos, pipe, len, flags);
return do_splice_read(in, ppos, pipe, len, flags);
}
EXPORT_SYMBOL_GPL(vfs_splice_read);
@ -1011,7 +1031,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
splice_direct_actor *actor)
{
struct pipe_inode_info *pipe;
long ret, bytes;
ssize_t ret, bytes;
size_t len;
int i, flags, more;
@ -1066,7 +1086,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
size_t read_len;
loff_t pos = sd->pos, prev_pos = pos;
ret = vfs_splice_read(in, &pos, pipe, len, flags);
ret = do_splice_read(in, &pos, pipe, len, flags);
if (unlikely(ret <= 0))
goto read_failure;
@ -1138,9 +1158,20 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
struct splice_desc *sd)
{
struct file *file = sd->u.file;
long ret;
return do_splice_from(pipe, file, sd->opos, sd->total_len,
sd->flags);
file_start_write(file);
ret = do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags);
file_end_write(file);
return ret;
}
static int splice_file_range_actor(struct pipe_inode_info *pipe,
struct splice_desc *sd)
{
struct file *file = sd->u.file;
return do_splice_from(pipe, file, sd->opos, sd->total_len, sd->flags);
}
static void direct_file_splice_eof(struct splice_desc *sd)
@ -1151,6 +1182,34 @@ static void direct_file_splice_eof(struct splice_desc *sd)
file->f_op->splice_eof(file);
}
static ssize_t do_splice_direct_actor(struct file *in, loff_t *ppos,
struct file *out, loff_t *opos,
size_t len, unsigned int flags,
splice_direct_actor *actor)
{
struct splice_desc sd = {
.len = len,
.total_len = len,
.flags = flags,
.pos = *ppos,
.u.file = out,
.splice_eof = direct_file_splice_eof,
.opos = opos,
};
ssize_t ret;
if (unlikely(!(out->f_mode & FMODE_WRITE)))
return -EBADF;
if (unlikely(out->f_flags & O_APPEND))
return -EINVAL;
ret = splice_direct_to_actor(in, &sd, actor);
if (ret > 0)
*ppos = sd.pos;
return ret;
}
/**
* do_splice_direct - splices data directly between two files
* @in: file to splice from
@ -1166,39 +1225,42 @@ static void direct_file_splice_eof(struct splice_desc *sd)
* (splice in + splice out, as compared to just sendfile()). So this helper
* can splice directly through a process-private pipe.
*
* Callers already called rw_verify_area() on the entire range.
*/
long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags)
ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags)
{
struct splice_desc sd = {
.len = len,
.total_len = len,
.flags = flags,
.pos = *ppos,
.u.file = out,
.splice_eof = direct_file_splice_eof,
.opos = opos,
};
long ret;
if (unlikely(!(out->f_mode & FMODE_WRITE)))
return -EBADF;
if (unlikely(out->f_flags & O_APPEND))
return -EINVAL;
ret = rw_verify_area(WRITE, out, opos, len);
if (unlikely(ret < 0))
return ret;
ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
if (ret > 0)
*ppos = sd.pos;
return ret;
return do_splice_direct_actor(in, ppos, out, opos, len, flags,
direct_splice_actor);
}
EXPORT_SYMBOL(do_splice_direct);
/**
* splice_file_range - splices data between two files for copy_file_range()
* @in: file to splice from
* @ppos: input file offset
* @out: file to splice to
* @opos: output file offset
* @len: number of bytes to splice
*
* Description:
* For use by ->copy_file_range() methods.
* Like do_splice_direct(), but vfs_copy_file_range() already holds
* start_file_write() on @out file.
*
* Callers already called rw_verify_area() on the entire range.
*/
ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len)
{
lockdep_assert(file_write_started(out));
return do_splice_direct_actor(in, ppos, out, opos,
min_t(size_t, len, MAX_RW_COUNT),
0, splice_file_range_actor);
}
EXPORT_SYMBOL(splice_file_range);
static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
{
for (;;) {
@ -1220,17 +1282,17 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags);
long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags)
ssize_t splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe,
loff_t *offset,
size_t len, unsigned int flags)
{
long ret;
ssize_t ret;
pipe_lock(opipe);
ret = wait_for_space(opipe, flags);
if (!ret)
ret = vfs_splice_read(in, offset, opipe, len, flags);
ret = do_splice_read(in, offset, opipe, len, flags);
pipe_unlock(opipe);
if (ret > 0)
wakeup_pipe_readers(opipe);
@ -1240,13 +1302,13 @@ long splice_file_to_pipe(struct file *in,
/*
* Determine where to splice to/from.
*/
long do_splice(struct file *in, loff_t *off_in, struct file *out,
loff_t *off_out, size_t len, unsigned int flags)
ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out,
loff_t *off_out, size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
loff_t offset;
long ret;
ssize_t ret;
if (unlikely(!(in->f_mode & FMODE_READ) ||
!(out->f_mode & FMODE_WRITE)))
@ -1307,6 +1369,10 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
offset = in->f_pos;
}
ret = rw_verify_area(READ, in, &offset, len);
if (unlikely(ret < 0))
return ret;
if (out->f_flags & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;
@ -1333,14 +1399,14 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
return ret;
}
static long __do_splice(struct file *in, loff_t __user *off_in,
struct file *out, loff_t __user *off_out,
size_t len, unsigned int flags)
static ssize_t __do_splice(struct file *in, loff_t __user *off_in,
struct file *out, loff_t __user *off_out,
size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
loff_t offset, *__off_in = NULL, *__off_out = NULL;
long ret;
ssize_t ret;
ipipe = get_pipe_info(in, true);
opipe = get_pipe_info(out, true);
@ -1379,16 +1445,16 @@ static long __do_splice(struct file *in, loff_t __user *off_in,
return ret;
}
static int iter_to_pipe(struct iov_iter *from,
struct pipe_inode_info *pipe,
unsigned flags)
static ssize_t iter_to_pipe(struct iov_iter *from,
struct pipe_inode_info *pipe,
unsigned int flags)
{
struct pipe_buffer buf = {
.ops = &user_page_pipe_buf_ops,
.flags = flags
};
size_t total = 0;
int ret = 0;
ssize_t ret = 0;
while (iov_iter_count(from)) {
struct page *pages[16];
@ -1437,8 +1503,8 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
* For lack of a better implementation, implement vmsplice() to userspace
* as a simple copy of the pipes pages to the user iov.
*/
static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
unsigned int flags)
static ssize_t vmsplice_to_user(struct file *file, struct iov_iter *iter,
unsigned int flags)
{
struct pipe_inode_info *pipe = get_pipe_info(file, true);
struct splice_desc sd = {
@ -1446,7 +1512,7 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
.flags = flags,
.u.data = iter
};
long ret = 0;
ssize_t ret = 0;
if (!pipe)
return -EBADF;
@ -1470,11 +1536,11 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
* as splice-from-memory, where the regular splice is splice-from-file (or
* to file). In both cases the output is a pipe, naturally.
*/
static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
unsigned int flags)
static ssize_t vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
unsigned int flags)
{
struct pipe_inode_info *pipe;
long ret = 0;
ssize_t ret = 0;
unsigned buf_flag = 0;
if (flags & SPLICE_F_GIFT)
@ -1570,7 +1636,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
size_t, len, unsigned int, flags)
{
struct fd in, out;
long error;
ssize_t error;
if (unlikely(!len))
return 0;
@ -1584,7 +1650,7 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
out = fdget(fd_out);
if (out.file) {
error = __do_splice(in.file, off_in, out.file, off_out,
len, flags);
len, flags);
fdput(out);
}
fdput(in);
@ -1807,15 +1873,15 @@ retry:
/*
* Link contents of ipipe to opipe.
*/
static int link_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags)
static ssize_t link_pipe(struct pipe_inode_info *ipipe,
struct pipe_inode_info *opipe,
size_t len, unsigned int flags)
{
struct pipe_buffer *ibuf, *obuf;
unsigned int i_head, o_head;
unsigned int i_tail, o_tail;
unsigned int i_mask, o_mask;
int ret = 0;
ssize_t ret = 0;
/*
* Potential ABBA deadlock, work around it by ordering lock
@ -1898,11 +1964,12 @@ static int link_pipe(struct pipe_inode_info *ipipe,
* The 'flags' used are the SPLICE_F_* variants, currently the only
* applicable one is SPLICE_F_NONBLOCK.
*/
long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
ssize_t do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags)
{
struct pipe_inode_info *ipipe = get_pipe_info(in, true);
struct pipe_inode_info *opipe = get_pipe_info(out, true);
int ret = -EINVAL;
ssize_t ret = -EINVAL;
if (unlikely(!(in->f_mode & FMODE_READ) ||
!(out->f_mode & FMODE_WRITE)))
@ -1939,7 +2006,7 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
{
struct fd in, out;
int error;
ssize_t error;
if (unlikely(flags & ~SPLICE_F_ALL))
return -EINVAL;

View File

@ -0,0 +1,42 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Common helpers for stackable filesystems and backing files.
*
* Copyright (C) 2023 CTERA Networks.
*/
#ifndef _LINUX_BACKING_FILE_H
#define _LINUX_BACKING_FILE_H
#include <linux/file.h>
#include <linux/uio.h>
#include <linux/fs.h>
struct backing_file_ctx {
const struct cred *cred;
struct file *user_file;
void (*accessed)(struct file *);
void (*end_write)(struct file *);
};
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred);
ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
struct kiocb *iocb, int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx);
ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos, size_t len,
unsigned int flags,
struct backing_file_ctx *ctx);
int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
struct backing_file_ctx *ctx);
#endif /* _LINUX_BACKING_FILE_H */

View File

@ -1648,9 +1648,70 @@ static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
#define __sb_writers_release(sb, lev) \
percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
/**
* __sb_write_started - check if sb freeze level is held
* @sb: the super we write to
* @level: the freeze level
*
* * > 0 - sb freeze level is held
* * 0 - sb freeze level is not held
* * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
*/
static inline int __sb_write_started(const struct super_block *sb, int level)
{
return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
}
/**
* sb_write_started - check if SB_FREEZE_WRITE is held
* @sb: the super we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
*/
static inline bool sb_write_started(const struct super_block *sb)
{
return lockdep_is_held_type(sb->s_writers.rw_sem + SB_FREEZE_WRITE - 1, 1);
return __sb_write_started(sb, SB_FREEZE_WRITE);
}
/**
* sb_write_not_started - check if SB_FREEZE_WRITE is not held
* @sb: the super we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
*/
static inline bool sb_write_not_started(const struct super_block *sb)
{
return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
}
/**
* file_write_started - check if SB_FREEZE_WRITE is held
* @file: the file we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
* May be false positive with !S_ISREG, because file_start_write() has
* no effect on !S_ISREG.
*/
static inline bool file_write_started(const struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return true;
return sb_write_started(file_inode(file)->i_sb);
}
/**
* file_write_not_started - check if SB_FREEZE_WRITE is not held
* @file: the file we write to
*
* May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
* May be false positive with !S_ISREG, because file_start_write() has
* no effect on !S_ISREG.
*/
static inline bool file_write_not_started(const struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return true;
return sb_write_not_started(file_inode(file)->i_sb);
}
/**
@ -2032,9 +2093,6 @@ extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int);
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
size_t len, unsigned int flags);
int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t *len, unsigned int remap_flags,
@ -2535,9 +2593,6 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
const struct cred *cred);
struct file *backing_file_open(const struct path *user_path, int flags,
const struct path *real_path,
const struct cred *cred);
struct path *backing_file_user_path(struct file *f);
/*
@ -3017,8 +3072,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
size_t len, unsigned int flags);
extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
struct file *, loff_t *, size_t, unsigned int);
extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags);
extern void

View File

@ -100,29 +100,49 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}
/* Simple call site for access decisions */
static inline int fsnotify_perm(struct file *file, int mask)
/*
* fsnotify_file_area_perm - permission hook before access to file range
*/
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
const loff_t *ppos, size_t count)
{
int ret;
__u32 fsnotify_mask = 0;
__u32 fsnotify_mask = FS_ACCESS_PERM;
if (!(mask & (MAY_READ | MAY_OPEN)))
/*
* filesystem may be modified in the context of permission events
* (e.g. by HSM filling a file on access), so sb freeze protection
* must not be held.
*/
lockdep_assert_once(file_write_not_started(file));
if (!(perm_mask & MAY_READ))
return 0;
if (mask & MAY_OPEN) {
fsnotify_mask = FS_OPEN_PERM;
return fsnotify_file(file, fsnotify_mask);
}
if (file->f_flags & __FMODE_EXEC) {
ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
/*
* fsnotify_file_perm - permission hook before file access
*/
static inline int fsnotify_file_perm(struct file *file, int perm_mask)
{
return fsnotify_file_area_perm(file, perm_mask, NULL, 0);
}
if (ret)
return ret;
}
} else if (mask & MAY_READ) {
fsnotify_mask = FS_ACCESS_PERM;
/*
* fsnotify_open_perm - permission hook before file open
*/
static inline int fsnotify_open_perm(struct file *file)
{
int ret;
if (file->f_flags & __FMODE_EXEC) {
ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
if (ret)
return ret;
}
return fsnotify_file(file, fsnotify_mask);
return fsnotify_file(file, FS_OPEN_PERM);
}
/*

View File

@ -68,28 +68,37 @@ typedef int (splice_actor)(struct pipe_inode_info *, struct pipe_buffer *,
typedef int (splice_direct_actor)(struct pipe_inode_info *,
struct splice_desc *);
extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
loff_t *, size_t, unsigned int,
splice_actor *);
extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
struct splice_desc *, splice_actor *);
extern ssize_t splice_to_pipe(struct pipe_inode_info *,
struct splice_pipe_desc *);
extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *);
long vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *);
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags,
splice_actor *actor);
ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
struct splice_desc *sd, splice_actor *actor);
ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd);
ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf);
ssize_t vfs_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
ssize_t splice_direct_to_actor(struct file *file, struct splice_desc *sd,
splice_direct_actor *actor);
ssize_t do_splice(struct file *in, loff_t *off_in, struct file *out,
loff_t *off_out, size_t len, unsigned int flags);
ssize_t do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len, unsigned int flags);
ssize_t splice_file_range(struct file *in, loff_t *ppos, struct file *out,
loff_t *opos, size_t len);
extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags);
static inline long splice_copy_file_range(struct file *in, loff_t pos_in,
struct file *out, loff_t pos_out,
size_t len)
{
return splice_file_range(in, &pos_in, out, &pos_out, len);
}
ssize_t do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags);
/*
* for dynamic pipe sizing

View File

@ -51,7 +51,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags)
struct file *out = sp->file_out;
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
struct file *in;
long ret = 0;
ssize_t ret = 0;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
@ -92,7 +92,7 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags)
unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED;
loff_t *poff_in, *poff_out;
struct file *in;
long ret = 0;
ssize_t ret = 0;
WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);

View File

@ -2580,13 +2580,7 @@ int security_kernfs_init_security(struct kernfs_node *kn_dir,
*/
int security_file_permission(struct file *file, int mask)
{
int ret;
ret = call_int_hook(file_permission, 0, file, mask);
if (ret)
return ret;
return fsnotify_perm(file, mask);
return call_int_hook(file_permission, 0, file, mask);
}
/**
@ -2837,7 +2831,7 @@ int security_file_open(struct file *file)
if (ret)
return ret;
return fsnotify_perm(file, MAY_OPEN);
return fsnotify_open_perm(file);
}
/**