linux-stable/fs
Andrei Vagin 58b7064773 fs: sendfile handles O_NONBLOCK of out_fd
commit bdeb77bc2c upstream.

sendfile has to return EAGAIN if out_fd is nonblocking and the write into
it would block.

Here is a small reproducer for the problem:

#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/sendfile.h>


#define FILE_SIZE (1UL << 30)
int main(int argc, char **argv) {
        int p[2], fd;

        if (pipe2(p, O_NONBLOCK))
                return 1;

        fd = open(argv[1], O_RDWR | O_TMPFILE, 0666);
        if (fd < 0)
                return 1;
        ftruncate(fd, FILE_SIZE);

        if (sendfile(p[1], fd, 0, FILE_SIZE) == -1) {
                fprintf(stderr, "FAIL\n");
        }
        if (sendfile(p[1], fd, 0, FILE_SIZE) != -1 || errno != EAGAIN) {
                fprintf(stderr, "FAIL\n");
        }
        return 0;
}

It worked before b964bf53e5, it is stuck after b964bf53e5, and it
works again with this fix.

This regression occurred because do_splice_direct() calls pipe_write
that handles O_NONBLOCK.  Here is a trace log from the reproducer:

 1)               |  __x64_sys_sendfile64() {
 1)               |    do_sendfile() {
 1)               |      __fdget()
 1)               |      rw_verify_area()
 1)               |      __fdget()
 1)               |      rw_verify_area()
 1)               |      do_splice_direct() {
 1)               |        rw_verify_area()
 1)               |        splice_direct_to_actor() {
 1)               |          do_splice_to() {
 1)               |            rw_verify_area()
 1)               |            generic_file_splice_read()
 1) + 74.153 us   |          }
 1)               |          direct_splice_actor() {
 1)               |            iter_file_splice_write() {
 1)               |              __kmalloc()
 1)   0.148 us    |              pipe_lock();
 1)   0.153 us    |              splice_from_pipe_next.part.0();
 1)   0.162 us    |              page_cache_pipe_buf_confirm();
... 16 times
 1)   0.159 us    |              page_cache_pipe_buf_confirm();
 1)               |              vfs_iter_write() {
 1)               |                do_iter_write() {
 1)               |                  rw_verify_area()
 1)               |                  do_iter_readv_writev() {
 1)               |                    pipe_write() {
 1)               |                      mutex_lock()
 1)   0.153 us    |                      mutex_unlock();
 1)   1.368 us    |                    }
 1)   1.686 us    |                  }
 1)   5.798 us    |                }
 1)   6.084 us    |              }
 1)   0.174 us    |              kfree();
 1)   0.152 us    |              pipe_unlock();
 1) + 14.461 us   |            }
 1) + 14.783 us   |          }
 1)   0.164 us    |          page_cache_pipe_buf_release();
... 16 times
 1)   0.161 us    |          page_cache_pipe_buf_release();
 1)               |          touch_atime()
 1) + 95.854 us   |        }
 1) + 99.784 us   |      }
 1) ! 107.393 us  |    }
 1) ! 107.699 us  |  }

Link: https://lkml.kernel.org/r/20220415005015.525191-1-avagin@gmail.com
Fixes: b964bf53e5 ("teach sendfile(2) to handle send-to-pipe directly")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2022-08-03 12:05:16 +02:00
..
9p 9p: fix EBADF errors in cached mode 2022-06-29 09:04:26 +02:00
adfs
affs
afs netfs: do not unlock and put the folio twice 2022-07-22 10:21:42 +02:00
autofs
befs
bfs
btrfs btrfs: zoned: fix a leaked bioc in read_zone_info 2022-07-22 10:21:21 +02:00
cachefiles
ceph netfs: do not unlock and put the folio twice 2022-07-22 10:21:42 +02:00
cifs smb3: workaround negprot bug in some Samba servers 2022-07-22 10:21:42 +02:00
coda
configfs
cramfs
crypto
debugfs
devpts
dlm dlm: fix pending remove if msg allocation fails 2022-07-29 17:28:15 +02:00
ecryptfs
efivarfs
efs
erofs erofs: fix 'backmost' member of z_erofs_decompress_frontend 2022-06-14 18:44:57 +02:00
exfat exfat: use updated exfat_chain directly during renaming 2022-07-29 17:28:16 +02:00
exportfs exportfs: support idmapped mounts 2022-06-09 10:30:56 +02:00
ext2
ext4 ext4: add reserved GDT blocks check 2022-06-22 14:28:11 +02:00
f2fs f2fs: do not count ENOENT for error case 2022-06-29 09:04:38 +02:00
fat fat: add ratelimit to fat*_ent_bread() 2022-06-09 10:29:49 +02:00
freevxfs
fscache fscache: Fix if condition in fscache_wait_on_volume_collision() 2022-07-12 16:42:17 +02:00
fuse
gfs2 gfs2: use i_lock spin_lock for inode qadata 2022-06-09 10:29:47 +02:00
hfs
hfsplus
hostfs
hpfs
hugetlbfs hugetlbfs: fix hugetlbfs_statfs() locking 2022-06-09 10:30:31 +02:00
iomap iomap: iomap_write_failed fix 2022-06-09 10:30:08 +02:00
isofs
jbd2
jffs2 jffs2: fix memory leak in jffs2_do_fill_super 2022-06-14 18:44:55 +02:00
jfs fs: jfs: fix possible NULL pointer dereference in dbFree() 2022-06-09 10:29:49 +02:00
kernfs kernfs: Separate kernfs_pr_cont_buf and rename_lock. 2022-06-14 18:45:11 +02:00
ksmbd ksmbd: use SOCK_NONBLOCK type for kernel_accept() 2022-07-22 10:21:46 +02:00
lockd lockd: fix nlm_close_files 2022-07-22 10:21:35 +02:00
minix
netfs netfs: do not unlock and put the folio twice 2022-07-22 10:21:42 +02:00
nfs NFSv4: Add an fattr allocation to _nfs4_discover_trunking() 2022-07-07 17:54:52 +02:00
nfs_common
nfsd NFSD: Decode NFSv4 birth time attribute 2022-07-22 10:21:35 +02:00
nilfs2 nilfs2: fix incorrect masking of permission flags for symlinks 2022-07-22 10:21:21 +02:00
nls
notify fanotify: refine the validation checks on non-dir inode mask 2022-07-07 17:54:57 +02:00
ntfs ntfs: fix use-after-free in ntfs_ucsncmp() 2022-08-03 12:05:16 +02:00
ntfs3 fs/ntfs3: Fix invalid free in log_replay 2022-06-09 10:30:56 +02:00
ocfs2 Revert "ocfs2: mount shared volume without ha stack" 2022-08-03 12:05:16 +02:00
omfs
openpromfs
orangefs
overlayfs
proc vmcore: convert copy_oldmem_page() to take an iov_iter 2022-06-29 09:04:36 +02:00
pstore
qnx4
qnx6
quota quota: Prevent memory allocation recursion while holding dq_lock 2022-06-22 14:27:51 +02:00
ramfs
reiserfs
romfs
smbfs_common
squashfs
sysfs
sysv
tracefs
ubifs
udf udf: Avoid using stale lengthOfImpUse 2022-05-10 13:30:32 +02:00
ufs
unicode
vboxsf
verity
xfs
zonefs zonefs: fix zonefs_iomap_begin() for reads 2022-06-25 15:29:46 +02:00
aio.c
anon_inodes.c
attr.c fs: account for group membership 2022-06-22 14:28:10 +02:00
bad_inode.c
binfmt_aout.c
binfmt_elf.c
binfmt_elf_fdpic.c
binfmt_elf_test.c
binfmt_flat.c binfmt_flat: do not stop relocating GOT entries prematurely on riscv 2022-06-09 10:29:26 +02:00
binfmt_misc.c
binfmt_script.c
buffer.c
char_dev.c
compat_binfmt_elf.c
coredump.c
d_path.c
dax.c dax: fix cache flush on PMD-mapped pages 2022-06-09 10:30:28 +02:00
dcache.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c fix race between exit_itimers() and /proc/pid/timers 2022-07-22 10:21:18 +02:00
fcntl.c
fhandle.c
file.c
file_table.c
filesystems.c
fs-writeback.c writeback: Fix inode->i_io_list not be protected by inode->i_lock error 2022-06-14 18:45:18 +02:00
fs_context.c
fs_parser.c
fs_pin.c
fs_struct.c
fs_types.c
fsopen.c
init.c
inode.c writeback: Fix inode->i_io_list not be protected by inode->i_lock error 2022-06-14 18:45:18 +02:00
internal.h
io-wq.c
io-wq.h
io_uring.c io_uring: fix provided buffer import 2022-07-12 16:42:11 +02:00
ioctl.c
Kconfig
Kconfig.binfmt
kernel_read_file.c
libfs.c
locks.c
Makefile
mbcache.c
mount.h
mpage.c
namei.c fs: add two trivial lookup helpers 2022-06-09 10:30:56 +02:00
namespace.c fs: hold writers when changing mount's idmapping 2022-06-09 10:29:42 +02:00
no-block.c
nsfs.c
open.c
pipe.c pipe: Fix missing lock in pipe_resize_ring() 2022-06-06 08:48:53 +02:00
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c fs: sendfile handles O_NONBLOCK of out_fd 2022-08-03 12:05:16 +02:00
readdir.c
remap_range.c fs/remap: constrain dedupe of EOF blocks 2022-07-22 10:21:21 +02:00
select.c
seq_file.c rxrpc: Fix locking issue 2022-06-09 10:30:19 +02:00
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c
sync.c
sysctls.c
timerfd.c
userfaultfd.c userfaultfd: provide properly masked address for huge-pages 2022-08-03 12:05:16 +02:00
utimes.c
xattr.c