diff --git a/block/bdev.c b/block/bdev.c index e4cfb7adb645..750aec178b6a 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -425,6 +425,8 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) void bdev_add(struct block_device *bdev, dev_t dev) { + if (bdev_stable_writes(bdev)) + mapping_set_stable_writes(bdev->bd_inode->i_mapping); bdev->bd_dev = dev; bdev->bd_inode->i_rdev = dev; bdev->bd_inode->i_ino = dev; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index a5083d447a62..1f5db6863663 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -309,9 +309,7 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc) struct autofs_fs_context *ctx = fc->fs_private; struct autofs_sb_info *sbi = s->s_fs_info; struct inode *root_inode; - struct dentry *root; struct autofs_info *ino; - int ret = -ENOMEM; pr_debug("starting up, sbi = %p\n", sbi); @@ -328,56 +326,44 @@ static int autofs_fill_super(struct super_block *s, struct fs_context *fc) */ ino = autofs_new_ino(sbi); if (!ino) - goto fail; + return -ENOMEM; root_inode = autofs_get_inode(s, S_IFDIR | 0755); + if (!root_inode) + return -ENOMEM; + root_inode->i_uid = ctx->uid; root_inode->i_gid = ctx->gid; + root_inode->i_fop = &autofs_root_operations; + root_inode->i_op = &autofs_dir_inode_operations; - root = d_make_root(root_inode); - if (!root) - goto fail_ino; - - root->d_fsdata = ino; + s->s_root = d_make_root(root_inode); + if (unlikely(!s->s_root)) { + autofs_free_ino(ino); + return -ENOMEM; + } + s->s_root->d_fsdata = ino; if (ctx->pgrp_set) { sbi->oz_pgrp = find_get_pid(ctx->pgrp); - if (!sbi->oz_pgrp) { - ret = invalf(fc, "Could not find process group %d", - ctx->pgrp); - goto fail_dput; - } - } else { + if (!sbi->oz_pgrp) + return invalf(fc, "Could not find process group %d", + ctx->pgrp); + } else sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID); - } if (autofs_type_trigger(sbi->type)) - __managed_dentry_set_managed(root); - - root_inode->i_fop = &autofs_root_operations; - root_inode->i_op = &autofs_dir_inode_operations; + /* s->s_root won't be contended so there's little to + * be gained by not taking the d_lock when setting + * d_flags, even when a lot mounts are being done. + */ + managed_dentry_set_managed(s->s_root); pr_debug("pipe fd = %d, pgrp = %u\n", sbi->pipefd, pid_nr(sbi->oz_pgrp)); sbi->flags &= ~AUTOFS_SBI_CATATONIC; - - /* - * Success! Install the root dentry now to indicate completion. - */ - s->s_root = root; return 0; - - /* - * Failure ... clean up. - */ -fail_dput: - dput(root); - goto fail; -fail_ino: - autofs_free_ino(ino); -fail: - return ret; } /* diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a25dd3d20008..b0e8774c435a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -998,6 +998,14 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap, return rc; } +static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + static int ecryptfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) @@ -1006,8 +1014,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap, struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat, - request_mask, flags); + rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry), + &lower_stat, request_mask, flags); if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); diff --git a/fs/inode.c b/fs/inode.c index edcd8a61975f..f238d987dec9 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -215,6 +215,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) lockdep_set_class_and_name(&mapping->invalidate_lock, &sb->s_type->invalidate_lock_key, "mapping.invalidate_lock"); + if (sb->s_iflags & SB_I_STABLE_WRITES) + mapping_set_stable_writes(mapping); inode->i_private = NULL; inode->i_mapping = mapping; INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ diff --git a/fs/libfs.c b/fs/libfs.c index e9440d55073c..c2aa6fd4795c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -399,6 +399,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } + /* In this case, ->private_data is protected by f_pos_lock */ + file->private_data = NULL; return vfs_setpos(file, offset, U32_MAX); } @@ -428,7 +430,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); XA_STATE(xas, &so_ctx->xa, ctx->pos); @@ -437,7 +439,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(&xas); if (!dentry) - break; + return ERR_PTR(-ENOENT); if (!offset_dir_emit(ctx, dentry)) { dput(dentry); @@ -447,6 +449,7 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) dput(dentry); ctx->pos = xas.xa_index + 1; } + return NULL; } /** @@ -479,7 +482,12 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - offset_iterate_dir(d_inode(dir), ctx); + /* In this case, ->private_data is protected by f_pos_lock */ + if (ctx->pos == 2) + file->private_data = NULL; + else if (file->private_data == ERR_PTR(-ENOENT)) + return 0; + file->private_data = offset_iterate_dir(d_inode(dir), ctx); return 0; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 345b8f161ca4..c63b31a460be 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -171,7 +171,7 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat, request_mask, flags); + err = ovl_do_getattr(&realpath, stat, request_mask, flags); if (err) goto out; @@ -196,8 +196,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, (!is_dir ? STATX_NLINK : 0); ovl_path_lower(dentry, &realpath); - err = vfs_getattr(&realpath, &lowerstat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerstat, lowermask, + flags); if (err) goto out; @@ -249,8 +249,8 @@ int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, ovl_path_lowerdata(dentry, &realpath); if (realpath.dentry) { - err = vfs_getattr(&realpath, &lowerdatastat, - lowermask, flags); + err = ovl_do_getattr(&realpath, &lowerdatastat, + lowermask, flags); if (err) goto out; } else { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ca88b2636a57..05c3dd597fa8 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -408,6 +408,14 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline int ovl_do_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + if (flags & AT_GETATTR_NOSEC) + return vfs_getattr_nosec(path, stat, request_mask, flags); + return vfs_getattr(path, stat, request_mask, flags); +} + /* util.c */ int ovl_get_write_access(struct dentry *dentry); void ovl_put_write_access(struct dentry *dentry); diff --git a/fs/stat.c b/fs/stat.c index 24bb0209e459..f721d26ec3f7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -133,7 +133,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(idmap, path, stat, - request_mask, query_flags); + request_mask, + query_flags | AT_GETATTR_NOSEC); generic_fillattr(idmap, request_mask, inode, stat); return 0; @@ -166,6 +167,9 @@ int vfs_getattr(const struct path *path, struct kstat *stat, { int retval; + if (WARN_ON_ONCE(query_flags & AT_GETATTR_NOSEC)) + return -EPERM; + retval = security_inode_getattr(path); if (retval) return retval; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3dc47937da5d..3beb470f1892 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip); extern void xfs_setup_iops(struct xfs_inode *ip); extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); +static inline void xfs_update_stable_writes(struct xfs_inode *ip) +{ + if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) + mapping_set_stable_writes(VFS_I(ip)->i_mapping); + else + mapping_clear_stable_writes(VFS_I(ip)->i_mapping); +} + /* * When setting up a newly allocated inode, we need to call * xfs_finish_inode_setup() once the inode is fully instantiated at diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a82470e027f7..6c3919687ea6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1121,23 +1121,25 @@ xfs_ioctl_setattr_xflags( struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; + bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); uint64_t i_flags2; - /* Can't change realtime flag if any extents are allocated. */ - if ((ip->i_df.if_nextents || ip->i_delayed_blks) && - XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) - return -EINVAL; - - /* If realtime flag is set then must have realtime device */ - if (fa->fsx_xflags & FS_XFLAG_REALTIME) { - if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || - xfs_extlen_to_rtxmod(mp, ip->i_extsize)) + if (rtflag != XFS_IS_REALTIME_INODE(ip)) { + /* Can't change realtime flag if any extents are allocated. */ + if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; } - /* Clear reflink if we are actually able to set the rt flag. */ - if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) - ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + if (rtflag) { + /* If realtime flag is set then must have realtime device */ + if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || + xfs_extlen_to_rtxmod(mp, ip->i_extsize)) + return -EINVAL; + + /* Clear reflink if we are actually able to set the rt flag. */ + if (xfs_is_reflink_inode(ip)) + ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + } /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); @@ -1148,6 +1150,14 @@ xfs_ioctl_setattr_xflags( ip->i_diflags2 = i_flags2; xfs_diflags_to_iflags(ip, false); + + /* + * Make the stable writes flag match that of the device the inode + * resides on when flipping the RT flag. + */ + if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode)) + xfs_update_stable_writes(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index fdfda4fba12b..a0d77f5f512e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1298,6 +1298,13 @@ xfs_setup_inode( gfp_mask = mapping_gfp_mask(inode->i_mapping); mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + /* + * For real-time inodes update the stable write flags to that of the RT + * device instead of the data device. + */ + if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) + xfs_update_stable_writes(ip); + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bcc1ea44b4e8..06142ff7f9ce 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -204,6 +204,8 @@ enum mapping_flags { AS_NO_WRITEBACK_TAGS = 5, AS_LARGE_FOLIO_SUPPORT = 6, AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ + AS_STABLE_WRITES, /* must wait for writeback before modifying + folio contents */ }; /** @@ -289,6 +291,21 @@ static inline void mapping_clear_release_always(struct address_space *mapping) clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); } +static inline bool mapping_stable_writes(const struct address_space *mapping) +{ + return test_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_set_stable_writes(struct address_space *mapping) +{ + set_bit(AS_STABLE_WRITES, &mapping->flags); +} + +static inline void mapping_clear_stable_writes(struct address_space *mapping) +{ + clear_bit(AS_STABLE_WRITES, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 6c80f96049bd..282e90aeb163 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -116,5 +116,8 @@ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#if defined(__KERNEL__) +#define AT_GETATTR_NOSEC 0x80000000 +#endif #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index de7d11cf4c63..8ff6824a1005 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -409,7 +409,7 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte void *kaddr = kmap_local_page(page); size_t n = min(bytes, (size_t)PAGE_SIZE - offset); - n = iterate_and_advance(i, bytes, kaddr, + n = iterate_and_advance(i, n, kaddr + offset, copy_to_user_iter_nofault, memcpy_to_iter); kunmap_local(kaddr); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 46f2f5d3d183..ee2fd6a6af40 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -3107,7 +3107,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable); */ void folio_wait_stable(struct folio *folio) { - if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES) + if (mapping_stable_writes(folio_mapping(folio))) folio_wait_writeback(folio); } EXPORT_SYMBOL_GPL(folio_wait_stable);