diff --git a/block/bio.c b/block/bio.c index b9642a41f286..c9223e9d31da 100644 --- a/block/bio.c +++ b/block/bio.c @@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, bio->bi_opf = opf; bio->bi_flags = 0; bio->bi_ioprio = 0; + bio->bi_write_hint = 0; bio->bi_status = 0; bio->bi_iter.bi_sector = 0; bio->bi_iter.bi_size = 0; @@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp) { bio_set_flag(bio, BIO_CLONED); bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; if (bio->bi_bdev) { diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index e6468eab2681..b1e7415f8439 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src) if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; diff --git a/block/blk-merge.c b/block/blk-merge.c index 2d470cf2173e..2a06fd33039d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q, if (rq_data_dir(req) != rq_data_dir(next)) return NULL; + /* Don't merge requests with different write hints. */ + if (req->write_hint != next->write_hint) + return NULL; + if (req->ioprio != next->ioprio) return NULL; @@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (!bio_crypt_rq_ctx_compatible(rq, bio)) return false; + /* Don't merge requests with different write hints. */ + if (rq->write_hint != bio->bi_write_hint) + return false; + if (rq->ioprio != bio_prio(bio)) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index aa87fcfda1ec..34ceb15d2ea4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2585,6 +2585,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, rq->cmd_flags |= REQ_FAILFAST_MASK; rq->__sector = bio->bi_iter.bi_sector; + rq->write_hint = bio->bi_write_hint; blk_rq_bio_prep(rq, bio, nr_segs); /* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */ @@ -3185,6 +3186,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, } rq->nr_phys_segments = rq_src->nr_phys_segments; rq->ioprio = rq_src->ioprio; + rq->write_hint = rq_src->write_hint; if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0) goto free_and_out; diff --git a/block/bounce.c b/block/bounce.c index 7cfcb242f9a1..d6a5219f29dd 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) if (bio_flagged(bio_src, BIO_REMAPPED)) bio_set_flag(bio, BIO_REMAPPED); bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; diff --git a/block/fops.c b/block/fops.c index 93bae17ce660..63566a3db186 100644 --- a/block/fops.c +++ b/block/fops.c @@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; bio.bi_ioprio = iocb->ki_ioprio; ret = bio_iov_iter_get_pages(&bio, iter); @@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, for (;;) { bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; bio->bi_private = dio; bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; @@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, dio->flags = 0; dio->iocb = iocb; bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; bio->bi_end_io = blkdev_bio_end_io_async; bio->bi_ioprio = iocb->ki_ioprio; diff --git a/fs/buffer.c b/fs/buffer.c index d3bcf601d3e5..eb7d3ded2c33 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -55,7 +55,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, - struct writeback_control *wbc); + enum rw_hint hint, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, + inode->i_write_hint, wbc); nr_underway++; } bh = next; @@ -1944,7 +1945,8 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, + inode->i_write_hint, wbc); nr_underway++; } bh = next; @@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio) } static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, + enum rw_hint write_hint, struct writeback_control *wbc) { const enum req_op op = opf & REQ_OP_MASK; @@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_write_hint = write_hint; __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); @@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, void submit_bh(blk_opf_t opf, struct buffer_head *bh) { - submit_bh_wbc(opf, bh, NULL); + submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL); } EXPORT_SYMBOL(submit_bh); diff --git a/fs/direct-io.c b/fs/direct-io.c index 60456263a338..62c97ff9e852 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, bio->bi_end_io = dio_bio_end_io; if (dio->is_pinned) bio_set_flag(bio, BIO_PAGE_PINNED); + bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint; + sdio->bio = bio; sdio->logical_offset_in_bio = sdio->cur_page_fs_offset; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65294e3b0bef..01fde6d44bf6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/fs/fcntl.c b/fs/fcntl.c index c80a6acad742..8eb6d64a985b 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) } #endif -static bool rw_hint_valid(enum rw_hint hint) +static bool rw_hint_valid(u64 hint) { + BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET); + BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE); + BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT); + BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM); + BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG); + BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME); + switch (hint) { case RWH_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NONE: @@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint) } } -static long fcntl_rw_hint(struct file *file, unsigned int cmd, - unsigned long arg) +static long fcntl_get_rw_hint(struct file *file, unsigned int cmd, + unsigned long arg) { struct inode *inode = file_inode(file); u64 __user *argp = (u64 __user *)arg; - enum rw_hint hint; - u64 h; + u64 hint = READ_ONCE(inode->i_write_hint); - switch (cmd) { - case F_GET_RW_HINT: - h = inode->i_write_hint; - if (copy_to_user(argp, &h, sizeof(*argp))) - return -EFAULT; - return 0; - case F_SET_RW_HINT: - if (copy_from_user(&h, argp, sizeof(h))) - return -EFAULT; - hint = (enum rw_hint) h; - if (!rw_hint_valid(hint)) - return -EINVAL; + if (copy_to_user(argp, &hint, sizeof(*argp))) + return -EFAULT; + return 0; +} - inode_lock(inode); - inode->i_write_hint = hint; - inode_unlock(inode); - return 0; - default: +static long fcntl_set_rw_hint(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file_inode(file); + u64 __user *argp = (u64 __user *)arg; + u64 hint; + + if (copy_from_user(&hint, argp, sizeof(hint))) + return -EFAULT; + if (!rw_hint_valid(hint)) return -EINVAL; - } + + WRITE_ONCE(inode->i_write_hint, hint); + + /* + * file->f_mapping->host may differ from inode. As an example, + * blkdev_open() modifies file->f_mapping. + */ + if (file->f_mapping->host != inode) + WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); + + return 0; } static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, @@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, err = memfd_fcntl(filp, cmd, argi); break; case F_GET_RW_HINT: + err = fcntl_get_rw_hint(filp, cmd, arg); + break; case F_SET_RW_HINT: - err = fcntl_rw_hint(filp, cmd, arg); + err = fcntl_set_rw_hint(filp, cmd, arg); break; default: break; diff --git a/fs/inode.c b/fs/inode.c index 91048c4c9c9e..1aba6c0bf26a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index ae4e2026e59e..4e8e41c8b3c0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1690,6 +1690,7 @@ static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc, bio->bi_iter.bi_sector = iomap_sector(&wpc->iomap, pos); bio->bi_end_io = iomap_writepage_end_bio; wbc_init_bio(wbc, bio); + bio->bi_write_hint = inode->i_write_hint; ioend = iomap_ioend_from_bio(bio); INIT_LIST_HEAD(&ioend->io_list); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index bcd3f8cf5ea4..f3b43d223a46 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -380,6 +380,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, GFP_KERNEL); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); + bio->bi_write_hint = inode->i_write_hint; bio->bi_ioprio = dio->iocb->ki_ioprio; bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; diff --git a/fs/mpage.c b/fs/mpage.c index 738882e0766d..fa8b99a199fa 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -605,6 +605,7 @@ alloc_new: GFP_NOFS); bio->bi_iter.bi_sector = first_block << (blkbits - 9); wbc_init_bio(wbc, bio); + bio->bi_write_hint = inode->i_write_hint; } /* diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7a8150a5f051..492b0128b5d9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -8,6 +8,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -135,6 +136,7 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif + enum rw_hint write_hint; unsigned short ioprio; enum mq_rq_state state; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index f288c94374b3..12d87cef2c03 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bio_set; struct bio; @@ -269,6 +270,7 @@ struct bio { */ unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; + enum rw_hint bi_write_hint; blk_status_t bi_status; atomic_t __bi_remaining; diff --git a/include/linux/fs.h b/include/linux/fs.h index ed5966a70495..bdabda5dc364 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -309,19 +310,6 @@ struct address_space; struct writeback_control; struct readahead_control; -/* - * Write life time hint values. - * Stored in struct inode as u8. - */ -enum rw_hint { - WRITE_LIFE_NOT_SET = 0, - WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, - WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, - WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, - WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, - WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, -}; - /* Match RWF_* bits to IOCB bits */ #define IOCB_HIPRI (__force int) RWF_HIPRI #define IOCB_DSYNC (__force int) RWF_DSYNC @@ -677,7 +665,7 @@ struct inode { spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; - u8 i_write_hint; + enum rw_hint i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h new file mode 100644 index 000000000000..309ca72f2dfb --- /dev/null +++ b/include/linux/rw_hint.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RW_HINT_H +#define _LINUX_RW_HINT_H + +#include +#include +#include + +/* Block storage write lifetime hint values. */ +enum rw_hint { + WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET, + WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE, + WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT, + WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM, + WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG, + WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME, +} __packed; + +/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */ +#ifndef __CHECKER__ +static_assert(sizeof(enum rw_hint) == 1); +#endif + +#endif /* _LINUX_RW_HINT_H */