From c10e8558d49d4ec62d78af1cf2852a1640bea9f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 12 Jan 2024 09:11:13 -0800 Subject: [PATCH 01/60] f2fs: remove unnecessary f2fs_put_page in f2fs_rename [1] changed the below condition, which made f2fs_put_page() voided. This patch reapplies the AL's resolution in -next from [2]. - if (S_ISDIR(old_inode->i_mode)) { + if (old_is_dir && old_dir != new_dir) { old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); if (!old_dir_entry) { if (IS_ERR(old_dir_page)) [1] 7deee77b993a ("f2fs: Avoid reading renamed directory if parent does not change") [2] https://lore.kernel.org/all/20231220013402.GW1674809@ZenIV/ Suggested-by: Al Viro Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b3bb815fc6aa..ba11298b7837 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1105,14 +1105,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, iput(whiteout); } - if (old_is_dir) { - if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, - old_dir_page, new_dir); - else - f2fs_put_page(old_dir_page, 0); + if (old_dir_entry) + f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + if (old_is_dir) f2fs_i_links_write(old_dir, false); - } + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) { f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO); if (S_ISDIR(old_inode->i_mode)) From 0d8c7542f93a06e68c05a1a8ad47c52ea7cc3e95 Mon Sep 17 00:00:00 2001 From: Wu Bo Date: Thu, 28 Dec 2023 20:25:07 -0700 Subject: [PATCH 02/60] f2fs: check free sections before disable checkpoint 'f2fs_is_checkpoint_ready()' checks free sections. If there is not enough free sections, most f2fs operations will return -ENOSPC when checkpoint is disabled. It would be better to check free sections before disable checkpoint. Signed-off-by: Wu Bo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4c8836ded90f..f0f12b1eddc8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -906,6 +906,8 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; + if (has_not_enough_free_secs(sbi, 0, 0)) + return -EAGAIN; return 0; } From 8a430dd49e9cb021372b0ad91e60aeef9c6ced00 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:27 +0800 Subject: [PATCH 03/60] f2fs: compress: fix to guarantee persisting compressed blocks by CP If data block in compressed cluster is not persisted with metadata during checkpoint, after SPOR, the data may be corrupted, let's guarantee to write compressed page by checkpoint. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 +++- fs/f2fs/data.c | 17 +++++++++-------- fs/f2fs/f2fs.h | 4 +++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 531517dac079..3a8d8a213b40 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1418,6 +1418,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) struct f2fs_sb_info *sbi = bio->bi_private; struct compress_io_ctx *cic = (struct compress_io_ctx *)page_private(page); + enum count_type type = WB_DATA_TYPE(page, + f2fs_is_compressed_page(page)); int i; if (unlikely(bio->bi_status)) @@ -1425,7 +1427,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_compress_free_page(page); - dec_page_count(sbi, F2FS_WB_DATA); + dec_page_count(sbi, type); if (atomic_dec_return(&cic->pending_pages)) return; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 26e317696b33..d00e92b6c902 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -48,7 +48,7 @@ void f2fs_destroy_bioset(void) bioset_exit(&f2fs_bioset); } -static bool __is_cp_guaranteed(struct page *page) +bool f2fs_is_cp_guaranteed(struct page *page) { struct address_space *mapping = page->mapping; struct inode *inode; @@ -65,8 +65,6 @@ static bool __is_cp_guaranteed(struct page *page) S_ISDIR(inode->i_mode)) return true; - if (f2fs_is_compressed_page(page)) - return false; if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || page_private_gcing(page)) return true; @@ -338,7 +336,7 @@ static void f2fs_write_end_io(struct bio *bio) bio_for_each_segment_all(bvec, bio, iter_all) { struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page); + enum count_type type = WB_DATA_TYPE(page, false); if (page_private_dummy(page)) { clear_page_private_dummy(page); @@ -762,7 +760,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page)); + __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -973,7 +971,7 @@ alloc_new: if (fio->io_wbc) wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); - inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); *fio->last_block = fio->new_blkaddr; *fio->bio = bio; @@ -1007,6 +1005,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; struct page *bio_page; + enum count_type type; f2fs_bug_on(sbi, is_read_io(fio->op)); @@ -1046,7 +1045,8 @@ next: /* set submitted = true as a return value */ fio->submitted = 1; - inc_page_count(sbi, WB_DATA_TYPE(bio_page)); + type = WB_DATA_TYPE(bio_page, fio->compressed_page); + inc_page_count(sbi, type); if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, @@ -1059,7 +1059,8 @@ alloc_new: if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE) && fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page)); + dec_page_count(sbi, WB_DATA_TYPE(bio_page, + fio->compressed_page)); fio->retry = 1; goto skip; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 65294e3b0bef..50f3d546ded8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1080,7 +1080,8 @@ struct f2fs_sm_info { * f2fs monitors the number of several block types such as on-writeback, * dirty dentry blocks, dirty node blocks, and dirty meta blocks. */ -#define WB_DATA_TYPE(p) (__is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) +#define WB_DATA_TYPE(p, f) \ + (f || f2fs_is_cp_guaranteed(p) ? F2FS_WB_CP_DATA : F2FS_WB_DATA) enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, @@ -3804,6 +3805,7 @@ void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi); */ int __init f2fs_init_bioset(void); void f2fs_destroy_bioset(void); +bool f2fs_is_cp_guaranteed(struct page *page); int f2fs_init_bio_entry_cache(void); void f2fs_destroy_bio_entry_cache(void); void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, From fd244524c2cf07b5f4c3fe8abd6a99225c76544b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:28 +0800 Subject: [PATCH 04/60] f2fs: compress: fix to cover normal cluster write with cp_rwsem When we overwrite compressed cluster w/ normal cluster, we should not unlock cp_rwsem during f2fs_write_raw_pages(), otherwise data will be corrupted if partial blocks were persisted before CP & SPOR, due to cluster metadata wasn't updated atomically. Fixes: 4c8ff7095bef ("f2fs: support data compression") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 27 ++++++++++++++++++--------- fs/f2fs/data.c | 3 ++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3a8d8a213b40..ff26b49c0d71 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1443,12 +1443,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) } static int f2fs_write_raw_pages(struct compress_ctx *cc, - int *submitted, + int *submitted_p, struct writeback_control *wbc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret, i; + struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + int submitted, compr_blocks, i; + int ret = 0; compr_blocks = f2fs_compressed_blocks(cc); @@ -1463,6 +1465,10 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (compr_blocks < 0) return compr_blocks; + /* overwrite compressed cluster w/ normal cluster */ + if (compr_blocks > 0) + f2fs_lock_op(sbi); + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; @@ -1487,7 +1493,7 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { @@ -1495,26 +1501,29 @@ continue_unlock: unlock_page(cc->rpages[i]); ret = 0; } else if (ret == -EAGAIN) { + ret = 0; /* * for quota file, just redirty left pages to * avoid deadlock caused by cluster update race * from foreground operation. */ if (IS_NOQUOTA(cc->inode)) - return 0; - ret = 0; + goto out; f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); goto retry_write; } - return ret; + goto out; } - *submitted += _submitted; + *submitted_p += submitted; } - f2fs_balance_fs(F2FS_M_SB(mapping), true); +out: + if (compr_blocks > 0) + f2fs_unlock_op(sbi); - return 0; + f2fs_balance_fs(sbi, true); + return ret; } int f2fs_write_multi_pages(struct compress_ctx *cc, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d00e92b6c902..7a93a99fbd04 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2839,7 +2839,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .encrypted_page = NULL, .submitted = 0, .compr_blocks = compr_blocks, - .need_lock = LOCK_RETRY, + .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY, .post_read = f2fs_post_read_required(inode) ? 1 : 0, .io_type = io_type, .io_wbc = wbc, @@ -2920,6 +2920,7 @@ write: if (err == -EAGAIN) { err = f2fs_do_write_data_page(&fio); if (err == -EAGAIN) { + f2fs_bug_on(sbi, compr_blocks); fio.need_lock = LOCK_REQ; err = f2fs_do_write_data_page(&fio); } From eb8fbaa53374e0a2d4381190abfe708481517bbb Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 13 Jan 2024 03:41:29 +0800 Subject: [PATCH 05/60] f2fs: compress: fix to check unreleased compressed cluster Compressed cluster may not be released due to we can fail in release_compress_blocks(), fix to handle reserved compressed cluster correctly in reserve_compress_blocks(). Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Sheng Yong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b58ab1157b7e..941e02c0953c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3624,7 +3624,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) goto next; } - if (__is_valid_data_blkaddr(blkaddr)) { + /* + * compressed cluster was not released due to it + * fails in release_compress_blocks(), so NEW_ADDR + * is a possible case. + */ + if (blkaddr == NEW_ADDR || + __is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } @@ -3633,6 +3639,11 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) } reserved = cluster_size - compr_blocks; + + /* for the case all blocks in cluster were reserved */ + if (reserved == 1) + goto next; + ret = inc_valid_block_count(sbi, dn->inode, &reserved); if (ret) return ret; From 54607494875edd636aff3c21ace3ad9a7da758a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:30 +0800 Subject: [PATCH 06/60] f2fs: compress: fix to avoid inconsistence bewteen i_blocks and dnode In reserve_compress_blocks(), we update blkaddrs of dnode in prior to inc_valid_block_count(), it may cause inconsistent status bewteen i_blocks and blkaddrs once inc_valid_block_count() fails. To fix this issue, it needs to reverse their invoking order. Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 26 ++++++++++++++------------ fs/f2fs/segment.c | 2 +- 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7a93a99fbd04..65fe48bb17d1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1219,7 +1219,8 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) + err = inc_valid_block_count(sbi, dn->inode, &count, true); + if (unlikely(err)) return err; trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, @@ -1476,7 +1477,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = f2fs_data_blkaddr(dn); if (dn->data_blkaddr == NULL_ADDR) { - err = inc_valid_block_count(sbi, dn->inode, &count); + err = inc_valid_block_count(sbi, dn->inode, &count, true); if (unlikely(err)) return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 50f3d546ded8..69e71460a950 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2252,7 +2252,7 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, - struct inode *inode, blkcnt_t *count) + struct inode *inode, blkcnt_t *count, bool partial) { blkcnt_t diff = 0, release = 0; block_t avail_user_block_count; @@ -2292,6 +2292,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, avail_user_block_count = 0; } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + if (!partial) { + spin_unlock(&sbi->stat_lock); + goto enospc; + } + diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 941e02c0953c..1ff1c45e1927 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3614,14 +3614,16 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) blkcnt_t reserved; int ret; - for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { - blkaddr = f2fs_data_blkaddr(dn); + for (i = 0; i < cluster_size; i++) { + blkaddr = data_blkaddr(dn->inode, dn->node_page, + dn->ofs_in_node + i); if (i == 0) { - if (blkaddr == COMPRESS_ADDR) - continue; - dn->ofs_in_node += cluster_size; - goto next; + if (blkaddr != COMPRESS_ADDR) { + dn->ofs_in_node += cluster_size; + goto next; + } + continue; } /* @@ -3634,8 +3636,6 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) compr_blocks++; continue; } - - f2fs_set_data_blkaddr(dn, NEW_ADDR); } reserved = cluster_size - compr_blocks; @@ -3644,12 +3644,14 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (reserved == 1) goto next; - ret = inc_valid_block_count(sbi, dn->inode, &reserved); - if (ret) + ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + if (unlikely(ret)) return ret; - if (reserved != cluster_size - compr_blocks) - return -ENOSPC; + for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) { + if (f2fs_data_blkaddr(dn) == NULL_ADDR) + f2fs_set_data_blkaddr(dn, NEW_ADDR); + } f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f0f12b1eddc8..7901ede58113 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -248,7 +248,7 @@ retry: } else { blkcnt_t count = 1; - err = inc_valid_block_count(sbi, inode, &count); + err = inc_valid_block_count(sbi, inode, &count, true); if (err) { f2fs_put_dnode(&dn); return err; From b896e302f79678451a94769ddd9e52e954c64fbb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:31 +0800 Subject: [PATCH 07/60] f2fs: fix to remove unnecessary f2fs_bug_on() to avoid panic verify_blkaddr() will trigger panic once we inject fault into f2fs_is_valid_blkaddr(), fix to remove this unnecessary f2fs_bug_on(). Fixes: 18792e64c86d ("f2fs: support fault injection for f2fs_is_valid_blkaddr()") Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 69e71460a950..ab710bb6d8b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3470,11 +3470,9 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", blkaddr, type); - f2fs_bug_on(sbi, 1); - } } static inline bool __is_valid_data_blkaddr(block_t blkaddr) From c7115e094ca820bb72e0c89f158d16bc48c6fa04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 13 Jan 2024 03:41:32 +0800 Subject: [PATCH 08/60] f2fs: introduce FAULT_BLKADDR_CONSISTENCE We will encounter below inconsistent status when FAULT_BLKADDR type fault injection is on. Info: checkpoint state = d6 : nat_bits crc fsck compacted_summary orphan_inodes sudden-power-off [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c100 has i_blocks: 000000c0, but has 191 blocks [FIX] (fsck_chk_inode_blk:1260) --> [0x1c100] i_blocks=0x000000c0 -> 0xbf [FIX] (fsck_chk_inode_blk:1269) --> [0x1c100] i_compr_blocks=0x00000026 -> 0x27 [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1cadb has i_blocks: 0000002f, but has 46 blocks [FIX] (fsck_chk_inode_blk:1260) --> [0x1cadb] i_blocks=0x0000002f -> 0x2e [FIX] (fsck_chk_inode_blk:1269) --> [0x1cadb] i_compr_blocks=0x00000011 -> 0x12 [ASSERT] (fsck_chk_inode_blk:1254) --> ino: 0x1c62c has i_blocks: 00000002, but has 1 blocks [FIX] (fsck_chk_inode_blk:1260) --> [0x1c62c] i_blocks=0x00000002 -> 0x1 After we inject fault into f2fs_is_valid_blkaddr() during truncation, a) it missed to increase @nr_free or @valid_blocks b) it can cause in blkaddr leak in truncated dnode Which may cause inconsistent status. This patch separates FAULT_BLKADDR_CONSISTENCE from FAULT_BLKADDR, and rename FAULT_BLKADDR to FAULT_BLKADDR_VALIDITY so that we can: a) use FAULT_BLKADDR_CONSISTENCE in f2fs_truncate_data_blocks_range() to simulate inconsistent issue independently, then it can verify fsck repair flow. b) FAULT_BLKADDR_VALIDITY fault will not cause any inconsistent status, we can just use it to check error path handling in kernel side. Reviewed-by: Daeho Jeong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 47 +++++++++++++------------ Documentation/filesystems/f2fs.rst | 47 +++++++++++++------------ fs/f2fs/checkpoint.c | 19 +++++++--- fs/f2fs/f2fs.h | 5 ++- fs/f2fs/file.c | 8 +++-- fs/f2fs/super.c | 37 +++++++++---------- 6 files changed, 92 insertions(+), 71 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 99fa87a43926..48c135e24eb5 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -701,29 +701,30 @@ Description: Support configuring fault injection type, should be enabled with fault_injection option, fault type value is shown below, it supports single or combined type. - =================== =========== - Type_Name Type_Value - =================== =========== - FAULT_KMALLOC 0x000000001 - FAULT_KVMALLOC 0x000000002 - FAULT_PAGE_ALLOC 0x000000004 - FAULT_PAGE_GET 0x000000008 - FAULT_ALLOC_BIO 0x000000010 (obsolete) - FAULT_ALLOC_NID 0x000000020 - FAULT_ORPHAN 0x000000040 - FAULT_BLOCK 0x000000080 - FAULT_DIR_DEPTH 0x000000100 - FAULT_EVICT_INODE 0x000000200 - FAULT_TRUNCATE 0x000000400 - FAULT_READ_IO 0x000000800 - FAULT_CHECKPOINT 0x000001000 - FAULT_DISCARD 0x000002000 - FAULT_WRITE_IO 0x000004000 - FAULT_SLAB_ALLOC 0x000008000 - FAULT_DQUOT_INIT 0x000010000 - FAULT_LOCK_OP 0x000020000 - FAULT_BLKADDR 0x000040000 - =================== =========== + =========================== =========== + Type_Name Type_Value + =========================== =========== + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 (obsolete) + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 + FAULT_SLAB_ALLOC 0x000008000 + FAULT_DQUOT_INIT 0x000010000 + FAULT_LOCK_OP 0x000020000 + FAULT_BLKADDR_VALIDITY 0x000040000 + FAULT_BLKADDR_CONSISTENCE 0x000080000 + =========================== =========== What: /sys/fs/f2fs//discard_io_aware_gran Date: January 2023 diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index d32c6209685d..32cbfa864f38 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -184,29 +184,30 @@ fault_type=%d Support configuring fault injection type, should be enabled with fault_injection option, fault type value is shown below, it supports single or combined type. - =================== =========== - Type_Name Type_Value - =================== =========== - FAULT_KMALLOC 0x000000001 - FAULT_KVMALLOC 0x000000002 - FAULT_PAGE_ALLOC 0x000000004 - FAULT_PAGE_GET 0x000000008 - FAULT_ALLOC_BIO 0x000000010 (obsolete) - FAULT_ALLOC_NID 0x000000020 - FAULT_ORPHAN 0x000000040 - FAULT_BLOCK 0x000000080 - FAULT_DIR_DEPTH 0x000000100 - FAULT_EVICT_INODE 0x000000200 - FAULT_TRUNCATE 0x000000400 - FAULT_READ_IO 0x000000800 - FAULT_CHECKPOINT 0x000001000 - FAULT_DISCARD 0x000002000 - FAULT_WRITE_IO 0x000004000 - FAULT_SLAB_ALLOC 0x000008000 - FAULT_DQUOT_INIT 0x000010000 - FAULT_LOCK_OP 0x000020000 - FAULT_BLKADDR 0x000040000 - =================== =========== + =========================== =========== + Type_Name Type_Value + =========================== =========== + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 (obsolete) + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 + FAULT_SLAB_ALLOC 0x000008000 + FAULT_DQUOT_INIT 0x000010000 + FAULT_LOCK_OP 0x000020000 + FAULT_BLKADDR_VALIDITY 0x000040000 + FAULT_BLKADDR_CONSISTENCE 0x000080000 + =========================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b0597a539fc5..b85820e70f5e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -170,12 +170,9 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, return exist; } -bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, +static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { - if (time_to_inject(sbi, FAULT_BLKADDR)) - return false; - switch (type) { case META_NAT: break; @@ -230,6 +227,20 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return true; } +bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY)) + return false; + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); +} + +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) +{ + return __f2fs_is_valid_blkaddr(sbi, blkaddr, type); +} + /* * Readahead CP/NAT/SIT/SSA/POR pages */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ab710bb6d8b3..4481f68d6418 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -60,7 +60,8 @@ enum { FAULT_SLAB_ALLOC, FAULT_DQUOT_INIT, FAULT_LOCK_OP, - FAULT_BLKADDR, + FAULT_BLKADDR_VALIDITY, + FAULT_BLKADDR_CONSISTENCE, FAULT_MAX, }; @@ -3768,6 +3769,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); +bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, + block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1ff1c45e1927..25b119cf3499 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -590,9 +590,13 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE)) + if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) continue; + if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, + DATA_GENERIC_ENHANCE)) { + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + continue; + } if (compressed_cluster) valid_blocks++; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d45ab0992ae5..e2c066fbc0fa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -44,24 +44,25 @@ static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION const char *f2fs_fault_name[FAULT_MAX] = { - [FAULT_KMALLOC] = "kmalloc", - [FAULT_KVMALLOC] = "kvmalloc", - [FAULT_PAGE_ALLOC] = "page alloc", - [FAULT_PAGE_GET] = "page get", - [FAULT_ALLOC_NID] = "alloc nid", - [FAULT_ORPHAN] = "orphan", - [FAULT_BLOCK] = "no more block", - [FAULT_DIR_DEPTH] = "too big dir depth", - [FAULT_EVICT_INODE] = "evict_inode fail", - [FAULT_TRUNCATE] = "truncate fail", - [FAULT_READ_IO] = "read IO error", - [FAULT_CHECKPOINT] = "checkpoint error", - [FAULT_DISCARD] = "discard error", - [FAULT_WRITE_IO] = "write IO error", - [FAULT_SLAB_ALLOC] = "slab alloc", - [FAULT_DQUOT_INIT] = "dquot initialize", - [FAULT_LOCK_OP] = "lock_op", - [FAULT_BLKADDR] = "invalid blkaddr", + [FAULT_KMALLOC] = "kmalloc", + [FAULT_KVMALLOC] = "kvmalloc", + [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_NID] = "alloc nid", + [FAULT_ORPHAN] = "orphan", + [FAULT_BLOCK] = "no more block", + [FAULT_DIR_DEPTH] = "too big dir depth", + [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", + [FAULT_READ_IO] = "read IO error", + [FAULT_CHECKPOINT] = "checkpoint error", + [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", + [FAULT_SLAB_ALLOC] = "slab alloc", + [FAULT_DQUOT_INIT] = "dquot initialize", + [FAULT_LOCK_OP] = "lock_op", + [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr", + [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, From 536af8211586af09c5bea1c15ad28ddec5f66a97 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Jan 2024 19:27:40 +0800 Subject: [PATCH 09/60] f2fs: zone: fix to wait completion of last bio in zone correctly It needs to check last zone_pending_bio and wait IO completion before traverse next fio in io->io_list, otherwise, bio in next zone may be submitted before all IO completion in current zone. Fixes: e067dc3c6b9c ("f2fs: maintain six open zones for zoned devices") Cc: Daeho Jeong Signed-off-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 65fe48bb17d1..ac82e69a9f5f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1010,7 +1010,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); f2fs_down_write(&io->io_rwsem); - +next: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) { wait_for_completion_io(&io->zone_wait); @@ -1020,7 +1020,6 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) } #endif -next: if (fio->in_list) { spin_lock(&io->io_lock); if (list_empty(&io->io_list)) { From c2034ef6192a65a986a45c2aa2ed05824fdc0e9f Mon Sep 17 00:00:00 2001 From: Wenjie Qi Date: Tue, 16 Jan 2024 22:11:38 +0800 Subject: [PATCH 10/60] f2fs: fix NULL pointer dereference in f2fs_submit_page_write() BUG: kernel NULL pointer dereference, address: 0000000000000014 RIP: 0010:f2fs_submit_page_write+0x6cf/0x780 [f2fs] Call Trace: ? show_regs+0x6e/0x80 ? __die+0x29/0x70 ? page_fault_oops+0x154/0x4a0 ? prb_read_valid+0x20/0x30 ? __irq_work_queue_local+0x39/0xd0 ? irq_work_queue+0x36/0x70 ? do_user_addr_fault+0x314/0x6c0 ? exc_page_fault+0x7d/0x190 ? asm_exc_page_fault+0x2b/0x30 ? f2fs_submit_page_write+0x6cf/0x780 [f2fs] ? f2fs_submit_page_write+0x736/0x780 [f2fs] do_write_page+0x50/0x170 [f2fs] f2fs_outplace_write_data+0x61/0xb0 [f2fs] f2fs_do_write_data_page+0x3f8/0x660 [f2fs] f2fs_write_single_data_page+0x5bb/0x7a0 [f2fs] f2fs_write_cache_pages+0x3da/0xbe0 [f2fs] ... It is possible that other threads have added this fio to io->bio and submitted the io->bio before entering f2fs_submit_page_write(). At this point io->bio = NULL. If is_end_zone_blkaddr(sbi, fio->new_blkaddr) of this fio is true, then an NULL pointer dereference error occurs at bio_get(io->bio). The original code for determining zone end was after "out:", which would have missed some fio who is zone end. I've moved this code before "skip:" to make sure it's done for each fio. Fixes: e067dc3c6b9c ("f2fs: maintain six open zones for zoned devices") Signed-off-by: Wenjie Qi Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ac82e69a9f5f..05158f89ef32 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1080,10 +1080,6 @@ alloc_new: io->last_block_in_bio = fio->new_blkaddr; trace_f2fs_submit_page_write(fio->page, fio); -skip: - if (fio->in_list) - goto next; -out: #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1096,6 +1092,10 @@ out: __submit_merged_bio(io); } #endif +skip: + if (fio->in_list) + goto next; +out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); From b1c9d3f833ba60a288db111d7fe38edfeb9b8fbb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 26 Jan 2024 23:19:16 +0800 Subject: [PATCH 11/60] f2fs: support printk_ratelimited() in f2fs_printk() This patch supports using printk_ratelimited() in f2fs_printk(), and wrap ratelimited f2fs_printk() into f2fs_{err,warn,info}_ratelimited(), then, use these new helps to clean up codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 10 +++++----- fs/f2fs/dir.c | 5 ++--- fs/f2fs/f2fs.h | 40 +++++++++++++++++++++++----------------- fs/f2fs/super.c | 11 ++++++++--- 4 files changed, 38 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index ff26b49c0d71..0fd839358c15 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -512,8 +512,8 @@ static int lzorle_compress_pages(struct compress_ctx *cc) ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo-rle compress failed, ret:%d", ret); return -EIO; } return 0; @@ -780,9 +780,9 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) if (provided != calculated) { if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT); - printk_ratelimited( - "%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x", - KERN_INFO, sbi->sb->s_id, dic->inode->i_ino, + f2fs_info_ratelimited(sbi, + "checksum invalid, nid = %lu, %x vs %x", + dic->inode->i_ino, provided, calculated); } set_sbi_flag(sbi, SBI_NEED_FSCK); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 042593aed1ec..3f20d94e12f9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -995,9 +995,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, de = &d->dentry[bit_pos]; if (de->name_len == 0) { if (found_valid_dirent || !bit_pos) { - printk_ratelimited( - "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, sbi->sb->s_id, + f2fs_warn_ratelimited(sbi, + "invalid namelen(0), ino:%u, run fsck to fix.", le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4481f68d6418..b4b737e43a6b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1812,6 +1812,27 @@ struct f2fs_sb_info { #endif }; +__printf(3, 4) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, false, KERN_DEBUG fmt, ##__VA_ARGS__) + +#define f2fs_err_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_info_ratelimited(sbi, fmt, ...) \ + f2fs_printk(sbi, true, KERN_INFO fmt, ##__VA_ARGS__) + #ifdef CONFIG_F2FS_FAULT_INJECTION #define time_to_inject(sbi, type) __time_to_inject(sbi, type, __func__, \ __builtin_return_address(0)) @@ -1829,9 +1850,8 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type, atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); - printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", - KERN_INFO, sbi->sb->s_id, f2fs_fault_name[type], - func, parent_func); + f2fs_info_ratelimited(sbi, "inject %s in %s of %pS", + f2fs_fault_name[type], func, parent_func); return true; } return false; @@ -2325,20 +2345,6 @@ release_quota: return -ENOSPC; } -__printf(2, 3) -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); - -#define f2fs_err(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) -#define f2fs_warn(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) -#define f2fs_notice(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) -#define f2fs_info(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) -#define f2fs_debug(sbi, fmt, ...) \ - f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) - #define PAGE_PRIVATE_GET_FUNC(name, flagname) \ static inline bool page_private_##name(struct page *page) \ { \ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e2c066fbc0fa..3e2a5e3b3e99 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -264,7 +264,8 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; -void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, + const char *fmt, ...) { struct va_format vaf; va_list args; @@ -275,8 +276,12 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) level = printk_get_level(fmt); vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%c%cF2FS-fs (%s): %pV\n", - KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + if (limit_rate) + printk_ratelimited("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + else + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); va_end(args); } From 0b8eb814e05885cde53c1d56ee012a029b8413e6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 26 Jan 2024 23:19:17 +0800 Subject: [PATCH 12/60] f2fs: use f2fs_err_ratelimited() to avoid redundant logs Use f2fs_err_ratelimited() to instead f2fs_err() in f2fs_record_stop_reason() and f2fs_record_errors() to avoid redundant logs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3e2a5e3b3e99..1b718bebfaa1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4096,7 +4096,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi) f2fs_up_write(&sbi->sb_lock); if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err); + f2fs_err_ratelimited(sbi, + "f2fs_commit_super fails to record stop_reason, err:%d", + err); } void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag) @@ -4139,8 +4141,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error) err = f2fs_commit_super(sbi, false); if (err) - f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d", - error, err); + f2fs_err_ratelimited(sbi, + "f2fs_commit_super fails to record errors:%u, err:%d", + error, err); out_unlock: f2fs_up_write(&sbi->sb_lock); } From 2f9420d3a94aeebd92db88f00f4f2f1a3bd3f6cf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Jan 2024 10:23:13 +0800 Subject: [PATCH 13/60] f2fs: compress: fix to cover f2fs_disable_compressed_file() w/ i_sem - f2fs_disable_compressed_file - check inode_has_data - f2fs_file_mmap - mkwrite - f2fs_get_block_locked : update metadata in compressed inode's disk layout - fi->i_flags &= ~F2FS_COMPR_FL - clear_inode_flag(inode, FI_COMPRESSED_FILE); we should use i_sem lock to prevent above race case. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b4b737e43a6b..40d428636532 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4415,15 +4415,24 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - if (!f2fs_compressed_file(inode)) + f2fs_down_write(&F2FS_I(inode)->i_sem); + + if (!f2fs_compressed_file(inode)) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; - if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) + } + if (f2fs_is_mmap_file(inode) || + (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) { + f2fs_up_write(&F2FS_I(inode)->i_sem); return false; + } fi->i_flags &= ~F2FS_COMPR_FL; stat_dec_compr_inode(inode); clear_inode_flag(inode, FI_COMPRESSED_FILE); f2fs_mark_inode_dirty_sync(inode, true); + + f2fs_up_write(&F2FS_I(inode)->i_sem); return true; } From f289e95fffd5e6b59d175cb1e653db0daad7e456 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Thu, 18 Jan 2024 13:48:31 +0800 Subject: [PATCH 14/60] f2fs: compress: remove some redundant codes in f2fs_cache_compressed_page Just remove some redundant codes, no logic change. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 0fd839358c15..3dc488ce882b 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1889,12 +1889,8 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, set_page_private_data(cpage, ino); - if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) - goto out; - memcpy(page_address(cpage), page_address(page), PAGE_SIZE); SetPageUptodate(cpage); -out: f2fs_put_page(cpage, 1); } From 8e9c1a349b5e227f687fa6f047e1a785b07371ea Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 17 Jan 2024 15:59:58 +0800 Subject: [PATCH 15/60] f2fs: use IS_INODE replace IS_DNODE in f2fs_flush_inline_data Now IS_DNODE is used in f2fs_flush_inline_data and it has some problems: 1. Just only inodes may include inline data,not all direct nodes 2. When system IO is busy, it is inefficient to lock a direct node page but not an inode page. Besides, if this direct node page is being locked by others for IO, f2fs_flush_inline_data will be blocked here, which will affects the checkpoint process, this is unreasonable. So IS_INODE should be used in f2fs_flush_inline_data. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9b546fd21010..1d898a16f05a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1919,7 +1919,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) for (i = 0; i < nr_folios; i++) { struct page *page = &fbatch.folios[i]->page; - if (!IS_DNODE(page)) + if (!IS_INODE(page)) continue; lock_page(page); From 21ec68234826b1b54ab980a8df6e33c74cfbee58 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Jan 2024 22:49:15 +0800 Subject: [PATCH 16/60] f2fs: fix to avoid potential panic during recovery During recovery, if FAULT_BLOCK is on, it is possible that f2fs_reserve_new_block() will return -ENOSPC during recovery, then it may trigger panic. Also, if fault injection rate is 1 and only FAULT_BLOCK fault type is on, it may encounter deadloop in loop of block reservation. Let's change as below to fix these issues: - remove bug_on() to avoid panic. - limit the loop count of block reservation to avoid potential deadloop. Fixes: 956fa1ddc132 ("f2fs: fix to check return value of f2fs_reserve_new_block()") Reported-by: Zhiguo Niu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/recovery.c | 33 ++++++++++++++++----------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 40d428636532..543898482f8b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -76,6 +76,11 @@ struct f2fs_fault_info { extern const char *f2fs_fault_name[FAULT_MAX]; #define IS_FAULT_SET(fi, type) ((fi)->inject_type & BIT(type)) + +/* maximum retry count for injected failure */ +#define DEFAULT_FAILURE_RETRY_COUNT 8 +#else +#define DEFAULT_FAILURE_RETRY_COUNT 1 #endif /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d0f24ccbd1ac..aad1d1a9b3d6 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -611,6 +611,19 @@ truncate_out: return 0; } +static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) +{ + int i, err = 0; + + for (i = DEFAULT_FAILURE_RETRY_COUNT; i > 0; i--) { + err = f2fs_reserve_new_block(dn); + if (!err) + break; + } + + return err; +} + static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page) { @@ -712,14 +725,8 @@ retry_dn: */ if (dest == NEW_ADDR) { f2fs_truncate_data_blocks_range(&dn, 1); - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; continue; @@ -727,16 +734,8 @@ retry_dn: /* dest is valid block, try to recover from src to dest */ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - if (src == NULL_ADDR) { - do { - err = f2fs_reserve_new_block(&dn); - if (err == -ENOSPC) { - f2fs_bug_on(sbi, 1); - break; - } - } while (err && - IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)); + err = f2fs_reserve_new_block_retry(&dn); if (err) goto err; } From 87161a2b0aed9e9b614bbf6fe8697ad560ceb0cb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Feb 2024 11:21:00 -0800 Subject: [PATCH 17/60] f2fs: deprecate io_bits Let's deprecate an unused io_bits feature to save CPU cycles and memory. Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 2 - fs/f2fs/data.c | 73 +------------------------ fs/f2fs/f2fs.h | 25 ++------- fs/f2fs/file.c | 2 - fs/f2fs/gc.c | 10 +--- fs/f2fs/segment.c | 9 +-- fs/f2fs/super.c | 88 +----------------------------- include/linux/f2fs_fs.h | 6 -- 8 files changed, 10 insertions(+), 205 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 32cbfa864f38..9ac5083dae8e 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -229,8 +229,6 @@ mode=%s Control block allocation mode which supports "adaptive" option for more randomness. Please, use these options for your experiments and we strongly recommend to re-format the filesystem after using these options. -io_bits=%u Set the bit size of write IO requests. It should be set - with "mode=lfs". usrquota Enable plain user disk quota accounting. grpquota Enable plain group disk quota accounting. prjquota Enable plain project quota accounting. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 05158f89ef32..828c797cd47c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -338,17 +338,6 @@ static void f2fs_write_end_io(struct bio *bio) struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page, false); - if (page_private_dummy(page)) { - clear_page_private_dummy(page); - unlock_page(page); - mempool_free(page, sbi->write_io_dummy); - - if (unlikely(bio->bi_status)) - f2fs_stop_checkpoint(sbi, true, - STOP_CP_REASON_WRITE_FAIL); - continue; - } - fscrypt_finalize_bounce_page(&page); #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -522,50 +511,13 @@ void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, submit_bio(bio); } -static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio) -{ - unsigned int start = - (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi); - - if (start == 0) - return; - - /* fill dummy pages */ - for (; start < F2FS_IO_SIZE(sbi); start++) { - struct page *page = - mempool_alloc(sbi->write_io_dummy, - GFP_NOIO | __GFP_NOFAIL); - f2fs_bug_on(sbi, !page); - - lock_page(page); - - zero_user_segment(page, 0, PAGE_SIZE); - set_page_private_dummy(page); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) - f2fs_bug_on(sbi, 1); - } -} - static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); - if (type == DATA || type == NODE) { - if (f2fs_lfs_mode(sbi) && current->plug) - blk_finish_plug(current->plug); - - if (F2FS_IO_ALIGNED(sbi)) { - f2fs_align_write_bio(sbi, bio); - /* - * In the NODE case, we lose next block address chain. - * So, we need to do checkpoint in f2fs_sync_file. - */ - if (type == NODE) - set_sbi_flag(sbi, SBI_NEED_CP); - } - } + if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type)) + blk_finish_plug(current->plug); trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); @@ -794,16 +746,6 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, block_t last_blkaddr, block_t cur_blkaddr) { - if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { - unsigned int filled_blocks = - F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); - unsigned int io_size = F2FS_IO_SIZE(sbi); - unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; - - /* IOs in bio is aligned and left space of vectors is not enough */ - if (!(filled_blocks % io_size) && left_vecs < io_size) - return false; - } if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) return false; return io_type_is_mergeable(io, fio); @@ -1055,14 +997,6 @@ next: __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { - if (F2FS_IO_ALIGNED(sbi) && - (fio->type == DATA || fio->type == NODE) && - fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { - dec_page_count(sbi, WB_DATA_TYPE(bio_page, - fio->compressed_page)); - fio->retry = 1; - goto skip; - } io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, bio_page->index, fio, GFP_NOIO); @@ -1092,7 +1026,6 @@ alloc_new: __submit_merged_bio(io); } #endif -skip: if (fio->in_list) goto next; out: @@ -2669,8 +2602,6 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) if (fio) { if (page_private_gcing(fio->page)) return true; - if (page_private_dummy(fio->page)) - return true; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) return true; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 543898482f8b..4c52136cbc10 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,7 +148,6 @@ struct f2fs_rwsem { struct f2fs_mount_info { unsigned int opt; - int write_io_size_bits; /* Write IO size bits */ block_t root_reserved_blocks; /* root reserved blocks */ kuid_t s_resuid; /* reserved blocks for uid */ kgid_t s_resgid; /* reserved blocks for gid */ @@ -1117,6 +1116,7 @@ enum count_type { * ... Only can be used with META. */ #define PAGE_TYPE_OF_BIO(type) ((type) > META ? META : (type)) +#define PAGE_TYPE_ON_MAIN(type) ((type) == DATA || (type) == NODE) enum page_type { DATA = 0, NODE = 1, /* should not change this */ @@ -1211,7 +1211,6 @@ struct f2fs_io_info { unsigned int submitted:1; /* indicate IO submission */ unsigned int in_list:1; /* indicate fio is in io_list */ unsigned int is_por:1; /* indicate IO is from recovery or not */ - unsigned int retry:1; /* need to reallocate block address */ unsigned int encrypted:1; /* indicate file is encrypted */ unsigned int post_read:1; /* require post read */ enum iostat_type io_type; /* io type */ @@ -1413,18 +1412,16 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * Layout A: lowest bit should be 1 * | bit0 = 1 | bit1 | bit2 | ... | bit MAX | private data .... | * bit 0 PAGE_PRIVATE_NOT_POINTER - * bit 1 PAGE_PRIVATE_DUMMY_WRITE - * bit 2 PAGE_PRIVATE_ONGOING_MIGRATION - * bit 3 PAGE_PRIVATE_INLINE_INODE - * bit 4 PAGE_PRIVATE_REF_RESOURCE - * bit 5- f2fs private data + * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION + * bit 2 PAGE_PRIVATE_INLINE_INODE + * bit 3 PAGE_PRIVATE_REF_RESOURCE + * bit 4- f2fs private data * * Layout B: lowest bit should be 0 * page.private is a wrapped pointer. */ enum { PAGE_PRIVATE_NOT_POINTER, /* private contains non-pointer data */ - PAGE_PRIVATE_DUMMY_WRITE, /* data page for padding aligned IO */ PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ @@ -1571,7 +1568,6 @@ struct f2fs_sb_info { struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ struct f2fs_rwsem io_order_lock; - mempool_t *write_io_dummy; /* Dummy pages */ pgoff_t page_eio_ofs[NR_PAGE_TYPE]; /* EIO page offset */ int page_eio_cnt[NR_PAGE_TYPE]; /* EIO count */ @@ -2307,10 +2303,6 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - if (F2FS_IO_ALIGNED(sbi)) - avail_user_block_count -= sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (avail_user_block_count > sbi->unusable_block_count) avail_user_block_count -= sbi->unusable_block_count; @@ -2378,17 +2370,14 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_GET_FUNC(dummy, DUMMY_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_SET_FUNC(dummy, DUMMY_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); -PAGE_PRIVATE_CLEAR_FUNC(dummy, DUMMY_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2644,10 +2633,6 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; - if (F2FS_IO_ALIGNED(sbi)) - valid_block_count += sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments; - user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) user_block_count -= sbi->unusable_block_count; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 25b119cf3499..c6cd9474ba2d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -822,8 +822,6 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) */ if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) return true; - if (f2fs_lfs_mode(sbi) && rw == WRITE && F2FS_IO_ALIGNED(sbi)) - return true; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) return true; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a079eebfb080..6899f434ad68 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1184,7 +1184,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; int err; @@ -1273,7 +1272,6 @@ static int move_data_block(struct inode *inode, block_t bidx, .op_flags = 0, .encrypted_page = NULL, .in_list = 0, - .retry = 0, }; struct dnode_of_data dn; struct f2fs_summary sum; @@ -1393,18 +1391,12 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.op_flags = REQ_SYNC; fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); - if (fio.retry) { - err = -EAGAIN; - if (PageWriteback(fio.encrypted_page)) - end_page_writeback(fio.encrypted_page); - goto put_page_out; - } f2fs_update_iostat(fio.sbi, NULL, FS_GC_DATA_IO, F2FS_BLKSIZE); f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); -put_page_out: + f2fs_put_page(fio.encrypted_page, 1); recover_block: if (err) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7901ede58113..e5759813276a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3507,9 +3507,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (fio) { struct f2fs_bio_info *io; - if (F2FS_IO_ALIGNED(sbi)) - fio->retry = 0; - INIT_LIST_HEAD(&fio->list); fio->in_list = 1; io = sbi->write_io[fio->type] + fio->temp; @@ -3557,7 +3554,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); -reallocate: + f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) @@ -3565,10 +3562,6 @@ reallocate: /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); - if (fio->retry) { - fio->old_blkaddr = fio->new_blkaddr; - goto reallocate; - } f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b718bebfaa1..f1516fd5088a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -138,7 +138,6 @@ enum { Opt_resgid, Opt_resuid, Opt_mode, - Opt_io_size_bits, Opt_fault_injection, Opt_fault_type, Opt_lazytime, @@ -217,7 +216,6 @@ static match_table_t f2fs_tokens = { {Opt_resgid, "resgid=%u"}, {Opt_resuid, "resuid=%u"}, {Opt_mode, "mode=%s"}, - {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_fault_type, "fault_type=%u"}, {Opt_lazytime, "lazytime"}, @@ -349,46 +347,6 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } -static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) -{ - unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; - unsigned int avg_vblocks; - unsigned int wanted_reserved_segments; - block_t avail_user_block_count; - - if (!F2FS_IO_ALIGNED(sbi)) - return 0; - - /* average valid block count in section in worst case */ - avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); - - /* - * we need enough free space when migrating one section in worst case - */ - wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * - reserved_segments(sbi); - wanted_reserved_segments -= reserved_segments(sbi); - - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks - - F2FS_OPTION(sbi).root_reserved_blocks; - - if (wanted_reserved_segments * sbi->blocks_per_seg > - avail_user_block_count) { - f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", - wanted_reserved_segments, - avail_user_block_count >> sbi->log_blocks_per_seg); - return -ENOSPC; - } - - SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; - - f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", - wanted_reserved_segments); - - return 0; -} - static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -919,16 +877,6 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; - case Opt_io_size_bits: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { - f2fs_warn(sbi, "Not support %ld, larger than %d", - BIT(arg), BIO_MAX_VECS); - return -EINVAL; - } - F2FS_OPTION(sbi).write_io_size_bits = arg; - break; #ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) @@ -1398,12 +1346,6 @@ default_check: } #endif - if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) { - f2fs_err(sbi, "Should set mode=lfs with %luKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); - return -EINVAL; - } - if (test_opt(sbi, INLINE_XATTR_SIZE)) { int min_size, max_size; @@ -1724,7 +1666,6 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_page_array_cache(sbi); f2fs_destroy_xattr_caches(sbi); - mempool_destroy(sbi->write_io_dummy); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(F2FS_OPTION(sbi).s_qf_names[i]); @@ -2084,9 +2025,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) F2FS_OPTION(sbi).s_resuid), from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); - if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_bits=%u", - F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", @@ -2338,7 +2276,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); - bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_atgc = !test_opt(sbi, ATGC); bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); @@ -2446,12 +2383,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { - err = -EINVAL; - f2fs_warn(sbi, "switch io_bits option is not allowed"); - goto restore_opts; - } - if (no_compress_cache == !!test_opt(sbi, COMPRESS_CACHE)) { err = -EINVAL; f2fs_warn(sbi, "switch compress_cache option is not allowed"); @@ -4314,8 +4245,6 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) FDEV(i).total_segments, FDEV(i).start_blk, FDEV(i).end_blk); } - f2fs_info(sbi, - "IO Block Size: %8ld KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -4528,19 +4457,10 @@ try_onemore: if (err) goto free_iostat; - if (F2FS_IO_ALIGNED(sbi)) { - sbi->write_io_dummy = - mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0); - if (!sbi->write_io_dummy) { - err = -ENOMEM; - goto free_percpu; - } - } - /* init per sbi slab cache */ err = f2fs_init_xattr_caches(sbi); if (err) - goto free_io_dummy; + goto free_percpu; err = f2fs_init_page_array_cache(sbi); if (err) goto free_xattr_cache; @@ -4628,10 +4548,6 @@ try_onemore: goto free_nm; } - err = adjust_reserved_segment(sbi); - if (err) - goto free_nm; - /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); @@ -4862,8 +4778,6 @@ free_page_array_cache: f2fs_destroy_page_array_cache(sbi); free_xattr_cache: f2fs_destroy_xattr_caches(sbi); -free_io_dummy: - mempool_destroy(sbi->write_io_dummy); free_percpu: destroy_percpu_info(sbi); free_iostat: diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 053137a0fe45..9b69c50255b2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -40,12 +40,6 @@ #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_IO_SIZE(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ -#define F2FS_IO_SIZE_KB(sbi) BIT(F2FS_OPTION(sbi).write_io_size_bits + 2) /* KB */ -#define F2FS_IO_SIZE_BITS(sbi) (F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */ -#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1) -#define F2FS_IO_ALIGNED(sbi) (F2FS_IO_SIZE(sbi) > 1) - /* This flag is used by node and meta inodes, and by recovery */ #define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) From a60108f7dfb5867da1ad9c777d2fbbe47e4dbdd7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Feb 2024 13:56:27 -0800 Subject: [PATCH 18/60] f2fs: use BLKS_PER_SEG, BLKS_PER_SEC, and SEGS_PER_SEC No functional change. Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 ++-- fs/f2fs/debug.c | 6 +-- fs/f2fs/f2fs.h | 21 +++++--- fs/f2fs/file.c | 16 +++--- fs/f2fs/gc.c | 40 +++++++-------- fs/f2fs/node.c | 4 +- fs/f2fs/node.h | 4 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 114 +++++++++++++++++++++---------------------- fs/f2fs/segment.h | 44 ++++++++--------- fs/f2fs/super.c | 8 +-- fs/f2fs/sysfs.c | 6 +-- 12 files changed, 137 insertions(+), 138 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b85820e70f5e..a09a9609e228 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -900,7 +900,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); - if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { + if (cp_blocks > BLKS_PER_SEG(sbi) || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; @@ -1335,7 +1335,7 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (cpc->reason & CP_UMOUNT) { if (le32_to_cpu(ckpt->cp_pack_total_block_count) + - NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) { + NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) { clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); f2fs_notice(sbi, "Disable nat_bits due to no space"); } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) && @@ -1538,7 +1538,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) cp_ver |= ((__u64)crc32 << 32); *(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver); - blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks; + blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), blk + i); @@ -1741,9 +1741,9 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi) im->ino_num = 0; } - sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + sbi->max_orphans = (BLKS_PER_SEG(sbi) - F2FS_CP_PACKS - NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) * - F2FS_ORPHANS_PER_BLOCK; + F2FS_ORPHANS_PER_BLOCK; } int __init f2fs_create_checkpoint_caches(void) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fdbf994f1271..0d02224b99b7 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -41,7 +41,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) total_vblocks = 0; blks_per_sec = CAP_BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; @@ -135,7 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->cur_ckpt_time = sbi->cprc_info.cur_time; si->peak_ckpt_time = sbi->cprc_info.peak_time; spin_unlock(&sbi->cprc_info.stat_lock); - si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; + si->total_count = (int)sbi->user_block_count / BLKS_PER_SEG(sbi); si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); @@ -208,7 +208,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) if (!blks) continue; - if (blks == sbi->blocks_per_seg) + if (blks == BLKS_PER_SEG(sbi)) si->full_seg[type]++; else si->dirty_seg[type]++; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4c52136cbc10..50e7890cc6a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1813,6 +1813,14 @@ struct f2fs_sb_info { #endif }; +/* Definitions to access f2fs_sb_info */ +#define BLKS_PER_SEG(sbi) \ + ((sbi)->blocks_per_seg) +#define BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec << (sbi)->log_blocks_per_seg) +#define SEGS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec) + __printf(3, 4) void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); @@ -2511,11 +2519,8 @@ static inline int get_dirty_pages(struct inode *inode) static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) { - unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; - unsigned int segs = (get_pages(sbi, block_type) + pages_per_sec - 1) >> - sbi->log_blocks_per_seg; - - return segs / sbi->segs_per_sec; + return div_u64(get_pages(sbi, block_type) + BLKS_PER_SEC(sbi) - 1, + BLKS_PER_SEC(sbi)); } static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) @@ -2579,7 +2584,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 2) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -2588,7 +2593,7 @@ static inline block_t __start_cp_next_addr(struct f2fs_sb_info *sbi) block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); if (sbi->cur_cp_pack == 1) - start_addr += sbi->blocks_per_seg; + start_addr += BLKS_PER_SEG(sbi); return start_addr; } @@ -3458,7 +3463,7 @@ static inline __le32 *get_dnode_addr(struct inode *inode, sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \ -#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) +#define __is_large_section(sbi) (SEGS_PER_SEC(sbi) > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c6cd9474ba2d..767d16c74bb6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2580,7 +2580,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, .m_may_create = false }; struct extent_info ei = {}; pgoff_t pg_start, pg_end, next_pgofs; - unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; block_t blk_end = 0; bool fragmented = false; @@ -2689,7 +2688,8 @@ do_map: set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; - while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { + while (idx < map.m_lblk + map.m_len && + cnt < BLKS_PER_SEG(sbi)) { struct page *page; page = f2fs_get_lock_data_page(inode, idx, true); @@ -2709,7 +2709,7 @@ do_map: map.m_lblk = idx; check: - if (map.m_lblk < pg_end && cnt < blk_per_seg) + if (map.m_lblk < pg_end && cnt < BLKS_PER_SEG(sbi)) goto do_map; clear_inode_flag(inode, FI_SKIP_WRITES); @@ -2978,8 +2978,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for SEGS_PER_SEC %u != 1", + range.dev_num, sbi->s_ndevs, SEGS_PER_SEC(sbi)); return -EINVAL; } @@ -4081,7 +4081,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; int cluster_size = fi->i_cluster_size; int count, ret; @@ -4125,7 +4124,7 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; @@ -4160,7 +4159,6 @@ static int f2fs_ioc_compress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t page_idx = 0, last_idx; - unsigned int blk_per_seg = sbi->blocks_per_seg; int cluster_size = F2FS_I(inode)->i_cluster_size; int count, ret; @@ -4203,7 +4201,7 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret < 0) break; - if (get_dirty_pages(inode) >= blk_per_seg) { + if (get_dirty_pages(inode) >= BLKS_PER_SEG(sbi)) { ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) break; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6899f434ad68..46d67d7dc913 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -259,7 +259,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi, gc_type); - p->ofs_unit = sbi->segs_per_sec; + p->ofs_unit = SEGS_PER_SEC(sbi); if (__is_large_section(sbi)) { p->dirty_bitmap = dirty_i->dirty_secmap; p->max_search = count_bits(p->dirty_bitmap, @@ -282,7 +282,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, /* let's select beginning hot/small space first in no_heap mode*/ if (f2fs_need_rand_seg(sbi)) - p->offset = get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); + p->offset = get_random_u32_below(MAIN_SECS(sbi) * + SEGS_PER_SEC(sbi)); else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; @@ -295,13 +296,13 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, { /* SSR allocates in a segment unit */ if (p->alloc_mode == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (p->alloc_mode == AT_SSR) return UINT_MAX; /* LFS */ if (p->gc_mode == GC_GREEDY) - return 2 * sbi->blocks_per_seg * p->ofs_unit; + return 2 * BLKS_PER_SEG(sbi) * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else if (p->gc_mode == GC_AT) @@ -496,9 +497,9 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, return; } - for (i = 0; i < sbi->segs_per_sec; i++) + for (i = 0; i < SEGS_PER_SEC(sbi); i++) mtime += get_seg_entry(sbi, start + i)->mtime; - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -599,7 +600,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, unsigned long long age; unsigned long long max_mtime = sit_i->dirty_max_mtime; unsigned long long min_mtime = sit_i->dirty_min_mtime; - unsigned int seg_blocks = sbi->blocks_per_seg; unsigned int vblocks; unsigned int dirty_threshold = max(am->max_candidate_count, am->candidate_ratio * @@ -629,7 +629,7 @@ next_node: f2fs_bug_on(sbi, !vblocks); /* rare case */ - if (vblocks == seg_blocks) + if (vblocks == BLKS_PER_SEG(sbi)) goto skip_node; iter++; @@ -755,7 +755,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int ret = 0; mutex_lock(&dirty_i->seglist_lock); - last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; + last_segment = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); p.alloc_mode = alloc_mode; p.age = age; @@ -896,7 +896,7 @@ next: else sm->last_victim[p.gc_mode] = segno + p.ofs_unit; sm->last_victim[p.gc_mode] %= - (MAIN_SECS(sbi) * sbi->segs_per_sec); + (MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); break; } } @@ -1670,7 +1670,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; - unsigned int end_segno = start_segno + sbi->segs_per_sec; + unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; @@ -1678,7 +1678,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int submitted = 0; if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, sbi->segs_per_sec); + end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); /* * zone-capacity can be less than zone-size in zoned devices, @@ -1686,7 +1686,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * calculate the end segno in the zone which can be garbage collected */ if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= sbi->segs_per_sec - + end_segno -= SEGS_PER_SEC(sbi) - f2fs_usable_segs_in_sec(sbi, segno); sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); @@ -1986,7 +1986,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, /* Force block allocation for GC */ MAIN_SECS(sbi) -= secs; - start = MAIN_SECS(sbi) * sbi->segs_per_sec; + start = MAIN_SECS(sbi) * SEGS_PER_SEC(sbi); end = MAIN_SEGS(sbi) - 1; mutex_lock(&DIRTY_I(sbi)->seglist_lock); @@ -2004,7 +2004,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ - for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + for (segno = start; segno <= end; segno += SEGS_PER_SEC(sbi)) { struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), @@ -2048,7 +2048,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) int segment_count; int segment_count_main; long long block_count; - int segs = secs * sbi->segs_per_sec; + int segs = secs * SEGS_PER_SEC(sbi); f2fs_down_write(&sbi->sb_lock); @@ -2061,7 +2061,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + - (long long)segs * sbi->blocks_per_seg); + (long long)(segs << sbi->log_blocks_per_seg)); if (f2fs_is_multi_device(sbi)) { int last_dev = sbi->s_ndevs - 1; int dev_segs = @@ -2076,8 +2076,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { - int segs = secs * sbi->segs_per_sec; - long long blks = (long long)segs * sbi->blocks_per_seg; + int segs = secs * SEGS_PER_SEC(sbi); + long long blks = (long long)segs << sbi->log_blocks_per_seg; long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -2119,7 +2119,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) int last_dev = sbi->s_ndevs - 1; __u64 last_segs = FDEV(last_dev).total_segments; - if (block_count + last_segs * sbi->blocks_per_seg <= + if (block_count + (last_segs << sbi->log_blocks_per_seg) <= old_block_count) return -EINVAL; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1d898a16f05a..51241996b9ec 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2841,7 +2841,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, int i, idx, last_offset, nrpages; /* scan the node segment */ - last_offset = sbi->blocks_per_seg; + last_offset = BLKS_PER_SEG(sbi); addr = START_BLOCK(sbi, segno); sum_entry = &sum->entries[0]; @@ -3158,7 +3158,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) return 0; - nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg - + nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { struct page *page; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5bd16a95eef8..6aea13024ac1 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -208,10 +208,10 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) block_addr = (pgoff_t)(nm_i->nat_blkaddr + (block_off << 1) - - (block_off & (sbi->blocks_per_seg - 1))); + (block_off & (BLKS_PER_SEG(sbi) - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - block_addr += sbi->blocks_per_seg; + block_addr += BLKS_PER_SEG(sbi); return block_addr; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index aad1d1a9b3d6..b3baec666afe 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -354,7 +354,7 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, if (blkaddr + 1 == next_blkaddr) ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, ra_blocks * 2); - else if (next_blkaddr % sbi->blocks_per_seg) + else if (next_blkaddr % BLKS_PER_SEG(sbi)) ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, ra_blocks / 2); return ra_blocks; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e5759813276a..1518f1287c28 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -448,8 +448,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); - unsigned int threshold = sbi->blocks_per_seg * factor * - DEFAULT_DIRTY_THRESHOLD; + unsigned int threshold = (factor * DEFAULT_DIRTY_THRESHOLD) << + sbi->log_blocks_per_seg; unsigned int global_threshold = threshold * 3 / 2; if (dents >= threshold || qdata >= threshold || @@ -1134,8 +1134,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, struct seg_entry *sentry; unsigned int segno; block_t blk = start; - unsigned long offset, size, max_blocks = sbi->blocks_per_seg; - unsigned long *map; + unsigned long offset, size, *map; while (blk < end) { segno = GET_SEGNO(sbi, blk); @@ -1145,7 +1144,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, if (end < START_BLOCK(sbi, segno + 1)) size = GET_BLKOFF_FROM_SEG0(sbi, end); else - size = max_blocks; + size = BLKS_PER_SEG(sbi); map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); @@ -2044,7 +2043,6 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); unsigned long *cur_map = (unsigned long *)se->cur_valid_map; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; @@ -2056,8 +2054,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) || - !f2fs_block_unit_discard(sbi)) + if (se->valid_blocks == BLKS_PER_SEG(sbi) || + !f2fs_hw_support_discard(sbi) || + !f2fs_block_unit_discard(sbi)) return false; if (!force) { @@ -2074,13 +2073,14 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { - start = __find_rev_next_bit(dmap, max_blocks, end + 1); - if (start >= max_blocks) + start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1); + if (start >= BLKS_PER_SEG(sbi)) break; - end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); - if (force && start && end != max_blocks - && (end - start) < cpc->trim_minlen) + end = __find_rev_next_zero_bit(dmap, + BLKS_PER_SEG(sbi), start + 1); + if (force && start && end != BLKS_PER_SEG(sbi) && + (end - start) < cpc->trim_minlen) continue; if (check_only) @@ -2162,8 +2162,8 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, start + 1); if (section_alignment) { - start = rounddown(start, sbi->segs_per_sec); - end = roundup(end, sbi->segs_per_sec); + start = rounddown(start, SEGS_PER_SEC(sbi)); + end = roundup(end, SEGS_PER_SEC(sbi)); } for (i = start; i < end; i++) { @@ -2191,9 +2191,9 @@ next: if (!IS_CURSEC(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), - sbi->segs_per_sec << sbi->log_blocks_per_seg); + BLKS_PER_SEC(sbi)); - start = start_segno + sbi->segs_per_sec; + start = start_segno + SEGS_PER_SEC(sbi); if (start < end) goto next; else @@ -2212,7 +2212,7 @@ next: find_next: if (is_valid) { next_pos = find_next_zero_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || @@ -2224,13 +2224,13 @@ find_next: total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, - sbi->blocks_per_seg, cur_pos); + BLKS_PER_SEG(sbi), cur_pos); } skip: cur_pos = next_pos; is_valid = !is_valid; - if (cur_pos < sbi->blocks_per_seg) + if (cur_pos < BLKS_PER_SEG(sbi)) goto find_next; release_discard_addr(entry); @@ -2279,7 +2279,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE; if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) - dcc->discard_granularity = sbi->blocks_per_seg; + dcc->discard_granularity = BLKS_PER_SEG(sbi); else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) dcc->discard_granularity = BLKS_PER_SEC(sbi); @@ -2542,7 +2542,7 @@ static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int typ struct curseg_info *curseg = CURSEG_I(sbi, type); if (sbi->ckpt->alloc_type[type] == SSR) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); return curseg->next_blkoff; } @@ -2630,7 +2630,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi)) return !test_bit(segno, free_i->free_segmap); return 0; } @@ -2654,7 +2654,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, spin_lock(&free_i->segmap_lock); - if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { + if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) { segno = find_next_zero_bit(free_i->free_segmap, GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) @@ -2757,9 +2757,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) sanity_check_seg_type(sbi, seg_type); if (f2fs_need_rand_seg(sbi)) - return get_random_u32_below(MAIN_SECS(sbi) * sbi->segs_per_sec); + return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); - /* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) return curseg->segno; @@ -2827,7 +2826,7 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start); } static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, @@ -2838,7 +2837,7 @@ static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi, bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) { - return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg; + return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi); } /* @@ -3238,8 +3237,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); if (need_align) { - start_segno = rounddown(start_segno, sbi->segs_per_sec); - end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1; + start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi)); + end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1; } cpc.reason = CP_DISCARD; @@ -3437,7 +3436,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg); + f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi)); f2fs_wait_discard_bio(sbi, *new_blkaddr); @@ -3881,7 +3880,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i->next_blkoff = blk_off; if (seg_i->alloc_type == SSR) - blk_off = sbi->blocks_per_seg; + blk_off = BLKS_PER_SEG(sbi); for (j = 0; j < blk_off; j++) { struct f2fs_summary *s; @@ -3949,7 +3948,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) struct f2fs_summary *ns = &sum->entries[0]; int i; - for (i = 0; i < sbi->blocks_per_seg; i++, ns++) { + for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) { ns->version = 0; ns->ofs_in_node = 0; } @@ -4582,21 +4581,20 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks; - if (f2fs_block_unit_discard(sbi)) { - /* build discard map only one time */ - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } - } + if (!f2fs_block_unit_discard(sbi)) + goto init_discard_map_done; + /* build discard map only one time */ + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + goto init_discard_map_done; + } + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += BLKS_PER_SEG(sbi) - + se->valid_blocks; +init_discard_map_done: if (__is_large_section(sbi)) get_sec_entry(sbi, start)->valid_blocks += se->valid_blocks; @@ -4736,7 +4734,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) return; mutex_lock(&dirty_i->seglist_lock); - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { valid_blocks = get_valid_blocks(sbi, segno, true); secno = GET_SEC_FROM_SEG(sbi, segno); @@ -4835,7 +4833,7 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) if (curseg->alloc_type == SSR) continue; - for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) { if (!f2fs_test_bit(blkofs, se->cur_valid_map)) continue; out: @@ -5114,7 +5112,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( unsigned int secno; if (!sbi->unusable_blocks_per_sec) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); secno = GET_SEC_FROM_SEG(sbi, segno); seg_start = START_BLOCK(sbi, segno); @@ -5129,10 +5127,10 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( */ if (seg_start >= sec_cap_blkaddr) return 0; - if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr) + if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr) return sec_cap_blkaddr - seg_start; - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } #else int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) @@ -5158,7 +5156,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi)) return f2fs_usable_zone_blks_in_seg(sbi, segno); - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); } unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, @@ -5167,7 +5165,7 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); - return sbi->segs_per_sec; + return SEGS_PER_SEC(sbi); } /* @@ -5182,14 +5180,14 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = ULLONG_MAX; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { unsigned int i; unsigned long long mtime = 0; - for (i = 0; i < sbi->segs_per_sec; i++) + for (i = 0; i < SEGS_PER_SEC(sbi); i++) mtime += get_seg_entry(sbi, segno + i)->mtime; - mtime = div_u64(mtime, sbi->segs_per_sec); + mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; @@ -5228,7 +5226,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC); sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; - sm_info->min_seq_blocks = sbi->blocks_per_seg; + sm_info->min_seq_blocks = BLKS_PER_SEG(sbi); sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->min_ssr_sections = reserved_sections(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 8129be788bd5..febcfbadcdfa 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -48,21 +48,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define IS_CURSEC(sbi, secno) \ (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \ - (sbi)->segs_per_sec) || \ + SEGS_PER_SEC(sbi)) || \ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \ - (sbi)->segs_per_sec)) + SEGS_PER_SEC(sbi))) #define MAIN_BLKADDR(sbi) \ (SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \ @@ -93,24 +93,22 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1)) #define GET_SEGNO(sbi, blk_addr) \ ((!__is_valid_data_blkaddr(blk_addr)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) #define CAP_BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \ + (SEGS_PER_SEC(sbi) * BLKS_PER_SEG(sbi) - \ (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec - ((sbi)->unusable_blocks_per_sec >>\ + (SEGS_PER_SEC(sbi) - ((sbi)->unusable_blocks_per_sec >> \ (sbi)->log_blocks_per_seg)) #define GET_SEC_FROM_SEG(sbi, segno) \ - (((segno) == -1) ? -1 : (segno) / (sbi)->segs_per_sec) + (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ - ((secno) * (sbi)->segs_per_sec) + ((secno) * SEGS_PER_SEC(sbi)) #define GET_ZONE_FROM_SEC(sbi, secno) \ (((secno) == -1) ? -1 : (secno) / (sbi)->secs_per_zone) #define GET_ZONE_FROM_SEG(sbi, segno) \ @@ -364,7 +362,7 @@ static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int blocks = 0; int i; - for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) { + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { struct seg_entry *se = get_seg_entry(sbi, start_segno); blocks += se->ckpt_valid_blocks; @@ -449,7 +447,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) free_i->free_segments++; next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); + start_segno + SEGS_PER_SEC(sbi), start_segno); if (next >= start_segno + usable_segs) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; @@ -485,7 +483,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, if (!inmem && IS_CURSEC(sbi, secno)) goto skip_free; next = find_next_bit(free_i->free_segmap, - start_segno + sbi->segs_per_sec, start_segno); + start_segno + SEGS_PER_SEC(sbi), start_segno); if (next >= start_segno + usable_segs) { if (test_and_clear_bit(secno, free_i->free_secmap)) free_i->free_sections++; @@ -793,10 +791,10 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } - if (usable_blks_per_seg < sbi->blocks_per_seg) + if (usable_blks_per_seg < BLKS_PER_SEG(sbi)) f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map, - sbi->blocks_per_seg, - usable_blks_per_seg) != sbi->blocks_per_seg); + BLKS_PER_SEG(sbi), + usable_blks_per_seg) != BLKS_PER_SEG(sbi)); /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg @@ -915,9 +913,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) return 0; if (type == DATA) - return sbi->blocks_per_seg; + return BLKS_PER_SEG(sbi); else if (type == NODE) - return 8 * sbi->blocks_per_seg; + return 8 * BLKS_PER_SEG(sbi); else if (type == META) return 8 * BIO_MAX_VECS; else diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f1516fd5088a..c0688c124aa7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3643,7 +3643,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) } main_segs = le32_to_cpu(raw_super->segment_count_main); - blocks_per_seg = sbi->blocks_per_seg; + blocks_per_seg = BLKS_PER_SEG(sbi); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || @@ -3756,8 +3756,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); sbi->total_sections = le32_to_cpu(raw_super->section_count); sbi->total_node_count = - (le32_to_cpu(raw_super->segment_count_nat) / 2) - * sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK; + ((le32_to_cpu(raw_super->segment_count_nat) / 2) * + NAT_ENTRY_PER_BLOCK) << sbi->log_blocks_per_seg; F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); @@ -3766,7 +3766,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; - sbi->migration_granularity = sbi->segs_per_sec; + sbi->migration_granularity = SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a7ec55c7bb20..906d2af2d849 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -493,8 +493,8 @@ out: spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - F2FS_OPTION(sbi).root_reserved_blocks - - sbi->blocks_per_seg * - SM_I(sbi)->additional_reserved_segments)) { + (SM_I(sbi)->additional_reserved_segments << + sbi->log_blocks_per_seg))) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -551,7 +551,7 @@ out: } if (!strcmp(a->attr.name, "migration_granularity")) { - if (t == 0 || t > sbi->segs_per_sec) + if (t == 0 || t > SEGS_PER_SEC(sbi)) return -EINVAL; } From 2f0209f579d12bd0ea43a01a8696e30a8eeec1da Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 6 Feb 2024 14:32:55 -0800 Subject: [PATCH 19/60] f2fs: separate f2fs_gc_range() to use GC for a range Make f2fs_gc_range() an extenal function to use it for GC for a range. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 46d67d7dc913..7222876229ea 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1975,10 +1975,34 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) init_atgc_management(sbi); } -static int free_segment_range(struct f2fs_sb_info *sbi, - unsigned int secs, bool gc_only) +static int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, bool dry_run) { - unsigned int segno, next_inuse, start, end; + unsigned int segno; + + for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); + put_gc_inode(&gc_list); + + if (!dry_run && get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + } + + return 0; +} + +static int free_segment_range(struct f2fs_sb_info *sbi, + unsigned int secs, bool dry_run) +{ + unsigned int next_inuse, start, end; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; int gc_mode, gc_type; int err = 0; @@ -2004,25 +2028,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ - for (segno = start; segno <= end; segno += SEGS_PER_SEC(sbi)) { - struct gc_inode_list gc_list = { - .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), - }; - - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); - put_gc_inode(&gc_list); - - if (!gc_only && get_valid_blocks(sbi, segno, true)) { - err = -EAGAIN; - goto out; - } - if (fatal_signal_pending(current)) { - err = -ERESTARTSYS; - goto out; - } - } - if (gc_only) + err = f2fs_gc_range(sbi, start, end, dry_run); + if (err || dry_run) goto out; stat_inc_cp_call_count(sbi, TOTAL_CALL); From 40b2d55e045222dd6de2a54a299f682e0f954b03 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 7 Feb 2024 15:05:48 +0800 Subject: [PATCH 20/60] f2fs: fix to create selinux label during whiteout initialization generic/700 - output mismatch (see /media/fstests/results//generic/700.out.bad) --- tests/generic/700.out 2023-03-28 10:40:42.735529223 +0000 +++ /media/fstests/results//generic/700.out.bad 2024-02-06 04:37:56.000000000 +0000 @@ -1,2 +1,4 @@ QA output created by 700 +/mnt/scratch_f2fs/f1: security.selinux: No such attribute +/mnt/scratch_f2fs/f2: security.selinux: No such attribute Silence is golden ... (Run 'diff -u /media/fstests/tests/generic/700.out /media/fstests/results//generic/700.out.bad' to see the entire diff) HINT: You _MAY_ be missing kernel fix: 70b589a37e1a xfs: add selinux labels to whiteout inodes Previously, it missed to create selinux labels during whiteout inode initialization, fix this issue. Fixes: 7e01e7ad746b ("f2fs: support RENAME_WHITEOUT") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 5 +++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/namei.c | 25 +++++++++++++++++-------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3f20d94e12f9..02c9355176d3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -830,13 +830,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, return err; } -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname) { struct page *page; int err = 0; f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); + page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 50e7890cc6a5..cef15775cb33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3576,7 +3576,8 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, struct inode *inode); -int f2fs_do_tmpfile(struct inode *inode, struct inode *dir); +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, + struct f2fs_filename *fname); bool f2fs_empty_dir(struct inode *dir); static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index ba11298b7837..0875602e2406 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -852,7 +852,7 @@ out: static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode, bool is_whiteout, - struct inode **new_inode) + struct inode **new_inode, struct f2fs_filename *fname) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; @@ -880,7 +880,7 @@ static int __f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (err) goto out; - err = f2fs_do_tmpfile(inode, dir); + err = f2fs_do_tmpfile(inode, dir, fname); if (err) goto release_out; @@ -931,22 +931,24 @@ static int f2fs_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL); + err = __f2fs_tmpfile(idmap, dir, file, mode, false, NULL, NULL); return finish_open_simple(file, err); } static int f2fs_create_whiteout(struct mnt_idmap *idmap, - struct inode *dir, struct inode **whiteout) + struct inode *dir, struct inode **whiteout, + struct f2fs_filename *fname) { - return __f2fs_tmpfile(idmap, dir, NULL, - S_IFCHR | WHITEOUT_MODE, true, whiteout); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFCHR | WHITEOUT_MODE, + true, whiteout, fname); } int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct inode **new_inode) { - return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, false, new_inode); + return __f2fs_tmpfile(idmap, dir, NULL, S_IFREG, + false, new_inode, NULL); } static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, @@ -990,7 +992,14 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(idmap, old_dir, &whiteout); + struct f2fs_filename fname; + + err = f2fs_setup_filename(old_dir, &old_dentry->d_name, + 0, &fname); + if (err) + return err; + + err = f2fs_create_whiteout(idmap, old_dir, &whiteout, &fname); if (err) return err; } From 1ff61a3205185e4a7b61a9fb9ab1add1731f662b Mon Sep 17 00:00:00 2001 From: HuangXiaojia Date: Thu, 1 Feb 2024 15:38:58 +0800 Subject: [PATCH 21/60] f2fs: Use folio in f2fs_read_merkle_tree_page Use folio in f2fs_read_merkle_tree_page to reduce folio & page converisons from find_get_page_flags and read_mapping_page functions. But the return value should be the exact page. Signed-off-by: HuangXiaojia Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/verity.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 4fc95f353a7a..f7bb0c54502c 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -258,21 +258,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) { - struct page *page; + struct folio *folio; index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); - if (!page || !PageUptodate(page)) { + folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); - if (page) - put_page(page); + if (!IS_ERR(folio)) + folio_put(folio); else if (num_ra_pages > 1) page_cache_ra_unbounded(&ractl, num_ra_pages, 0); - page = read_mapping_page(inode->i_mapping, index, NULL); + folio = read_mapping_folio(inode->i_mapping, index, NULL); + if (IS_ERR(folio)) + return ERR_CAST(folio); } - return page; + return folio_file_page(folio, index); } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, From e39602da752cd1d0462e3fa04074146f6f2803f6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 13 Feb 2024 00:08:18 +0800 Subject: [PATCH 22/60] f2fs: compress: fix to check zstd compress level correctly in mount option f2fs only support to config zstd compress level w/ a positive number due to layout design, but since commit e0c1b49f5b67 ("lib: zstd: Upgrade to latest upstream zstd version 1.4.10"), zstd supports negative compress level, so that zstd_min_clevel() may return a negative number, then w/ below mount option, .compress_level can be configed w/ a negative number, which is not allowed to f2fs, let's add check condition to avoid it. mount -o compress_algorithm=zstd:4294967295 /dev/sdx /mnt/f2fs Fixes: 00e120b5e4b5 ("f2fs: assign default compression level") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c0688c124aa7..867b147eb957 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -627,7 +627,7 @@ static int f2fs_set_lz4hc_level(struct f2fs_sb_info *sbi, const char *str) #ifdef CONFIG_F2FS_FS_ZSTD static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) { - unsigned int level; + int level; int len = 4; if (strlen(str) == len) { @@ -641,9 +641,15 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) f2fs_info(sbi, "wrong format, e.g. :"); return -EINVAL; } - if (kstrtouint(str + 1, 10, &level)) + if (kstrtoint(str + 1, 10, &level)) return -EINVAL; + /* f2fs does not support negative compress level now */ + if (level < 0) { + f2fs_info(sbi, "do not support negative compress level: %d", level); + return -ERANGE; + } + if (!f2fs_is_compress_level_valid(COMPRESS_ZSTD, level)) { f2fs_info(sbi, "invalid zstd compress level: %d", level); return -EINVAL; From a94c7fded76bfd1a061deae7be80fedbfa26774e Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Thu, 15 Feb 2024 12:16:33 -0800 Subject: [PATCH 23/60] f2fs: support SEEK_DATA and SEEK_HOLE for compression files Fix to support SEEK_DATA and SEEK_HOLE for compression files Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 767d16c74bb6..f830f88a025d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -394,9 +394,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return f2fs_do_sync_file(file, start, end, datasync, false); } -static bool __found_offset(struct address_space *mapping, block_t blkaddr, - pgoff_t index, int whence) +static bool __found_offset(struct address_space *mapping, + struct dnode_of_data *dn, pgoff_t index, int whence) { + block_t blkaddr = f2fs_data_blkaddr(dn); + struct inode *inode = mapping->host; + bool compressed_cluster = false; + + if (f2fs_compressed_file(inode)) { + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); + + compressed_cluster = first_blkaddr == COMPRESS_ADDR; + } + switch (whence) { case SEEK_DATA: if (__is_valid_data_blkaddr(blkaddr)) @@ -404,8 +415,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr, if (blkaddr == NEW_ADDR && xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY)) return true; + if (compressed_cluster) + return true; break; case SEEK_HOLE: + if (compressed_cluster) + return false; if (blkaddr == NULL_ADDR) return true; break; @@ -474,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; } - if (__found_offset(file->f_mapping, blkaddr, + if (__found_offset(file->f_mapping, &dn, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; From 3ae768a132c70151fd5e6cbeea8a9e6bf08a1f0c Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 2 Feb 2024 09:52:08 -0700 Subject: [PATCH 24/60] f2fs: doc: Fix bouncing email address for Sahitya Tummala The servers for the @codeaurora domain are long retired and any messages addressed there will bounce. Sahitya Tummala has a .mailmap entry to an updated address, but the documentation files still list @codeaurora which might be a problem for anyone reading the documentation directly. Update the documentation files to match the .mailmap update. Signed-off-by: Jeffrey Hugo Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 48c135e24eb5..22d070c0de40 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -205,7 +205,7 @@ Description: Controls the idle timing of system, if there is no FS operation What: /sys/fs/f2fs//discard_idle_interval Date: September 2018 Contact: "Chao Yu" -Contact: "Sahitya Tummala" +Contact: "Sahitya Tummala" Description: Controls the idle timing of discard thread given this time interval. Default is 5 secs. @@ -213,7 +213,7 @@ Description: Controls the idle timing of discard thread given What: /sys/fs/f2fs//gc_idle_interval Date: September 2018 Contact: "Chao Yu" -Contact: "Sahitya Tummala" +Contact: "Sahitya Tummala" Description: Controls the idle timing for gc path. Set to 5 seconds by default. What: /sys/fs/f2fs//iostat_enable From 4e0197f9932f70cc7be8744aa0ed4dd9b5d97d85 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 20 Feb 2024 12:48:44 -0800 Subject: [PATCH 25/60] f2fs: kill heap-based allocation No one uses this feature. Let's kill it. Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.rst | 4 +-- fs/f2fs/gc.c | 5 ++- fs/f2fs/segment.c | 54 ++++-------------------------- fs/f2fs/segment.h | 10 ------ fs/f2fs/super.c | 9 +---- 5 files changed, 11 insertions(+), 71 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 9ac5083dae8e..8930d9ea8c4e 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -126,9 +126,7 @@ norecovery Disable the roll-forward recovery routine, mounted read- discard/nodiscard Enable/disable real-time discard in f2fs, if discard is enabled, f2fs will issue discard/TRIM commands when a segment is cleaned. -no_heap Disable heap-style segment allocation which finds free - segments for data from the beginning of main area, while - for node from the end of main area. +heap/no_heap Deprecated. nouser_xattr Disable Extended User Attributes. Note: xattr is enabled by default if CONFIG_F2FS_FS_XATTR is selected. noacl Disable POSIX Access Control List. Note: acl is enabled diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7222876229ea..c97d80e3b726 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -280,12 +280,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - /* let's select beginning hot/small space first in no_heap mode*/ + /* let's select beginning hot/small space first. */ if (f2fs_need_rand_seg(sbi)) p->offset = get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi)); - else if (test_opt(sbi, NOHEAP) && - (type == CURSEG_HOT_DATA || IS_NODESEG(type))) + else if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1518f1287c28..471fa7aa053a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2640,16 +2640,14 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * This function should be returned with success, otherwise BUG */ static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir) + unsigned int *newseg, bool new_sec) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); - unsigned int left_start = hint; bool init = true; - int go_left = 0; int i; spin_lock(&free_i->segmap_lock); @@ -2663,30 +2661,10 @@ static void get_new_segment(struct f2fs_sb_info *sbi, find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { - if (dir == ALLOC_RIGHT) { - secno = find_first_zero_bit(free_i->free_secmap, + secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); - f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); - } else { - go_left = 1; - left_start = hint - 1; - } + f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } - if (go_left == 0) - goto skip_left; - - while (test_bit(left_start, free_i->free_secmap)) { - if (left_start > 0) { - left_start--; - continue; - } - left_start = find_first_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi)); - f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); - break; - } - secno = left_start; -skip_left: segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2697,21 +2675,13 @@ skip_left: goto got_it; if (zoneno == old_zoneno) goto got_it; - if (dir == ALLOC_LEFT) { - if (!go_left && zoneno + 1 >= total_zones) - goto got_it; - if (go_left && zoneno == 0) - goto got_it; - } for (i = 0; i < NR_CURSEG_TYPE; i++) if (CURSEG_I(sbi, i)->zone == zoneno) break; if (i < NR_CURSEG_TYPE) { /* zone is in user, try another */ - if (go_left) - hint = zoneno * sbi->secs_per_zone - 1; - else if (zoneno + 1 >= total_zones) + if (zoneno + 1 >= total_zones) hint = 0; else hint = (zoneno + 1) * sbi->secs_per_zone; @@ -2769,8 +2739,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; - if (test_opt(sbi, NOHEAP) && - (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))) + if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)) return 0; if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) @@ -2790,21 +2759,12 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned short seg_type = curseg->seg_type; unsigned int segno = curseg->segno; - int dir = ALLOC_LEFT; if (curseg->inited) - write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, segno)); - if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA) - dir = ALLOC_RIGHT; - - if (test_opt(sbi, NOHEAP)) - dir = ALLOC_RIGHT; - + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec, dir); + get_new_segment(sbi, &segno, new_sec); curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index febcfbadcdfa..abfb00955b60 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -136,16 +136,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) -/* - * indicate a block allocation direction: RIGHT and LEFT. - * RIGHT means allocating new sections towards the end of volume. - * LEFT means the opposite direction. - */ -enum { - ALLOC_RIGHT = 0, - ALLOC_LEFT -}; - /* * In the victim_sel_policy->alloc_mode, there are three block allocation modes. * LFS writes data sequentially with cleaning operations. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 867b147eb957..329f317e6f09 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -733,10 +733,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) clear_opt(sbi, DISCARD); break; case Opt_noheap: - set_opt(sbi, NOHEAP); - break; case Opt_heap: - clear_opt(sbi, NOHEAP); + f2fs_warn(sbi, "heap/no_heap options were deprecated"); break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: @@ -1962,10 +1960,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) } else { seq_puts(seq, ",nodiscard"); } - if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap"); - else - seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -2142,7 +2136,6 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); - set_opt(sbi, NOHEAP); set_opt(sbi, MERGE_CHECKPOINT); F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; From 9703d69d9d153bb230711d0d577454552aeb13d4 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Tue, 13 Feb 2024 09:38:12 -0800 Subject: [PATCH 26/60] f2fs: support file pinning for zoned devices Support file pinning with conventional storage area for zoned devices Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- fs/f2fs/f2fs.h | 17 +++++++++++- fs/f2fs/file.c | 24 ++++++++++++----- fs/f2fs/gc.c | 14 +++++++--- fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ fs/f2fs/segment.h | 10 +++++++ 6 files changed, 154 insertions(+), 38 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 828c797cd47c..0c9aa3082fcf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); unsigned int secidx = start_blk / blk_per_sec; - unsigned int end_sec = secidx + blkcnt / blk_per_sec; + unsigned int end_sec; int ret = 0; + if (!blkcnt) + return 0; + end_sec = secidx + (blkcnt - 1) / blk_per_sec; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); set_inode_flag(inode, FI_OPU_WRITE); - for (; secidx < end_sec; secidx++) { + for (; secidx <= end_sec; secidx++) { + unsigned int blkofs_end = secidx == end_sec ? + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; + f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + ret = f2fs_allocate_pinning_section(sbi); + if (ret) { + f2fs_up_write(&sbi->pin_sem); + break; + } set_inode_flag(inode, FI_SKIP_WRITES); - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { struct page *page; unsigned int blkidx = secidx * blk_per_sec + blkofs; @@ -3946,27 +3955,34 @@ retry: nr_pblocks = map.m_len; if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask) { + nr_pblocks & sec_blks_mask || + !f2fs_valid_pinned_area(sbi, pblock)) { + bool last_extent = false; + not_aligned++; nr_pblocks = roundup(nr_pblocks, blks_per_sec); if (cur_lblock + nr_pblocks > sis->max) nr_pblocks -= blks_per_sec; + /* this extent is last one */ if (!nr_pblocks) { - /* this extent is last one */ - nr_pblocks = map.m_len; - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); - goto next; + nr_pblocks = last_lblock - cur_lblock; + last_extent = true; } ret = f2fs_migrate_blocks(inode, cur_lblock, nr_pblocks); - if (ret) + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; goto out; - goto retry; + } + + if (!last_extent) + goto retry; } -next: + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { - f2fs_err(F2FS_I_SB(inode), - "Swapfile not supported in LFS mode"); + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Swapfile not supported in LFS mode"); return -EINVAL; } @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, f2fs_precache_extents(inode); + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + return ret; + ret = check_swap_activate(sis, file, span); if (ret < 0) return ret; stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cef15775cb33..b2c44d2b3c9d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3699,7 +3699,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3877,6 +3878,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections); int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); @@ -4531,6 +4535,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (f2fs_sb_has_blkzoned(sbi)) { + int devi = f2fs_target_device_index(sbi, blkaddr); + + return !bdev_is_zoned(FDEV(devi).bdev); + } + return true; +} + static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) { return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f830f88a025d..891fa359f7e0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1748,9 +1748,11 @@ next_alloc: f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + err = f2fs_allocate_pinning_section(sbi); + if (err) { + f2fs_up_write(&sbi->pin_sem); + goto out_err; + } map.m_seg_type = CURSEG_COLD_DATA_PINNED; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); @@ -3200,6 +3202,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); __u32 pin; int ret = 0; @@ -3209,7 +3212,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; ret = mnt_want_write_file(filp); @@ -3222,9 +3225,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); goto done; + } else if (f2fs_is_pinned_file(inode)) { + goto done; } - if (f2fs_should_update_outplace(inode, NULL)) { + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { + ret = -EFBIG; + goto out; + } + + /* Let's allow file pinning on zoned device. */ + if (!f2fs_sb_has_blkzoned(sbi) && + f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } @@ -3246,7 +3258,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) set_inode_flag(inode, FI_PIN_FILE); ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); mnt_drop_write_file(filp); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c97d80e3b726..d194097c3da0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1974,10 +1974,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) init_atgc_management(sbi); } -static int f2fs_gc_range(struct f2fs_sb_info *sbi, - unsigned int start_seg, unsigned int end_seg, bool dry_run) +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections) { unsigned int segno; + unsigned int gc_secs = dry_run_sections; for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { struct gc_inode_list gc_list = { @@ -1985,11 +1987,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, + dry_run_sections == 0); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) return -EAGAIN; + if (dry_run && dry_run_sections && + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) + break; if (fatal_signal_pending(current)) return -ERESTARTSYS; @@ -2027,7 +2033,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ - err = f2fs_gc_range(sbi, start, end, dry_run); + err = f2fs_gc_range(sbi, start, end, dry_run, 0); if (err || dry_run) goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 471fa7aa053a..01067254ec40 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2640,7 +2640,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * This function should be returned with success, otherwise BUG */ static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec) + unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; @@ -2658,6 +2658,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } + + /* + * If we format f2fs on zoned storage, let's try to get pinned sections + * from beginning of the storage, which should be a conventional one. + */ + if (f2fs_sb_has_blkzoned(sbi)) { + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); + hint = GET_SEC_FROM_SEG(sbi, segno); + } + find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { @@ -2756,21 +2766,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) * Allocate a current working segment. * This function always allocates a free segment in LFS manner. */ -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno = curseg->segno; + bool pinning = type == CURSEG_COLD_DATA_PINNED; if (curseg->inited) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); + segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec); + get_new_segment(sbi, &segno, new_sec, pinning); + if (new_sec && pinning && + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + __set_free(sbi, segno); + return -EAGAIN; + } + curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); + return 0; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -3043,7 +3062,7 @@ unlock: f2fs_up_read(&SM_I(sbi)->curseg_lock); } -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -3053,21 +3072,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) - return; + return 0; old_segno = curseg->segno; - new_curseg(sbi, type, true); + if (new_curseg(sbi, type, true)) + return -EAGAIN; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); + return 0; } -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { + int ret; + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type, true, force); + ret = __allocate_new_segment(sbi, type, true, force); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return ret; +} + +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) +{ + int err; + bool gc_required = true; + +retry: + f2fs_lock_op(sbi); + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { + f2fs_down_write(&sbi->gc_lock); + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); + f2fs_up_write(&sbi->gc_lock); + + gc_required = false; + goto retry; + } + + return err; } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) @@ -3433,6 +3480,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * new segment. */ if (segment_full) { + if (type == CURSEG_COLD_DATA_PINNED && + !((curseg->segno + 1) % sbi->segs_per_sec)) + goto skip_new_segment; + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); @@ -3444,6 +3495,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_seg_type(sbi, curseg); } } + +skip_new_segment: /* * segment dirty status should be updated after segment allocation, * so we just need to update status only one time after previous diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index abfb00955b60..bd3594968c5f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -957,3 +957,13 @@ wake_up: dcc->discard_wake = true; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) +{ + int devi; + + for (devi = 0; devi < sbi->s_ndevs; devi++) + if (bdev_is_zoned(FDEV(devi).bdev)) + return GET_SEGNO(sbi, FDEV(devi).start_blk); + return 0; +} From 0f1c6ede6da9f7c5dd7380b74a64850298279168 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Feb 2024 11:15:15 +0800 Subject: [PATCH 27/60] f2fs: introduce get_available_block_count() for cleanup There are very similar codes in inc_valid_block_count() and inc_valid_node_count() which is used for available user block count calculation. This patch introduces a new helper get_available_block_count() to include those common codes, and used it to clean up codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2c44d2b3c9d..9a3f6ea51e5a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2280,6 +2280,27 @@ static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi, return false; } +static inline unsigned int get_available_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, bool cap) +{ + block_t avail_user_block_count; + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; + + if (!__allow_reserved_blocks(sbi, inode, cap)) + avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } + + return avail_user_block_count; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count, bool partial) @@ -2305,18 +2326,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - - sbi->current_reserved_blocks; + avail_user_block_count = get_available_block_count(sbi, inode, true); - if (!__allow_reserved_blocks(sbi, inode, true)) - avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - if (avail_user_block_count > sbi->unusable_block_count) - avail_user_block_count -= sbi->unusable_block_count; - else - avail_user_block_count = 0; - } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { if (!partial) { spin_unlock(&sbi->stat_lock); @@ -2612,7 +2623,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count, user_block_count; + unsigned int valid_node_count; + unsigned int avail_user_block_count; int err; if (is_inode) { @@ -2632,17 +2644,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); - valid_block_count = sbi->total_valid_block_count + - sbi->current_reserved_blocks + 1; + valid_block_count = sbi->total_valid_block_count + 1; + avail_user_block_count = get_available_block_count(sbi, inode, false); - if (!__allow_reserved_blocks(sbi, inode, false)) - valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; - - user_block_count = sbi->user_block_count; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - user_block_count -= sbi->unusable_block_count; - - if (unlikely(valid_block_count > user_block_count)) { + if (unlikely(valid_block_count > avail_user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } From e24e8333d0f3110473c6e6a20afd51911984e901 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 22 Feb 2024 20:18:48 +0800 Subject: [PATCH 28/60] f2fs: delete f2fs_get_new_segment() declaration Commit 093749e296e2 ("f2fs: support age threshold based garbage collection") added this declaration, but w/ definition, delete it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a3f6ea51e5a..68a5003014b7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3700,8 +3700,6 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); -void f2fs_get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); From f9e28904e6442019043a8e94ec6747a064d06003 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Tue, 20 Feb 2024 14:11:24 +0800 Subject: [PATCH 29/60] f2fs: stop checkpoint when get a out-of-bounds segment There is low probability that an out-of-bounds segment will be got on a small-capacity device. In order to prevent subsequent write requests allocating block address from this invalid segment, which may cause unexpected issue, stop checkpoint should be performed. Also introduce a new stop cp reason: STOP_CP_REASON_NO_SEGMENT. Note, f2fs_stop_checkpoint(, false) is complex and it may sleep, so we should move it outside segmap_lock spinlock coverage in get_new_segment(). Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 +++++++++++- include/linux/f2fs_fs.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 01067254ec40..dc511630eaa5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2649,6 +2649,7 @@ static void get_new_segment(struct f2fs_sb_info *sbi, unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); bool init = true; int i; + int ret = 0; spin_lock(&free_i->segmap_lock); @@ -2673,7 +2674,10 @@ find_other_zone: if (secno >= MAIN_SECS(sbi)) { secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); - f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + goto out_unlock; + } } segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2703,7 +2707,13 @@ got_it: f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); __set_inuse(sbi, segno); *newseg = segno; +out_unlock: spin_unlock(&free_i->segmap_lock); + + if (ret) { + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); + f2fs_bug_on(sbi, 1); + } } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 9b69c50255b2..755e9a41b196 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -75,6 +75,7 @@ enum stop_cp_reason { STOP_CP_REASON_CORRUPTED_SUMMARY, STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, + STOP_CP_REASON_NO_SEGMENT, STOP_CP_REASON_MAX, }; From 7d009e048d7cfcc21d400f2aba4c8bacbdebbd47 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 22 Feb 2024 20:18:50 +0800 Subject: [PATCH 30/60] f2fs: fix to handle segment allocation failure correctly If CONFIG_F2FS_CHECK_FS is off, and for very rare corner case that we run out of free segment, we should not panic kernel, instead, let's handle such error correctly in its caller. Signed-off-by: Chao Yu Tested-by: Zhiguo Niu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 7 +++++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 7 ++++++- fs/f2fs/gc.c | 7 ++++++- fs/f2fs/segment.c | 46 +++++++++++++++++++++++++++++++++++++++------- 5 files changed, 57 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0c9aa3082fcf..c21b92f18463 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1416,8 +1416,11 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; - f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr, + &dn->data_blkaddr, &sum, seg_type, NULL); + if (err) + return err; + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(sbi, old_blkaddr); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 68a5003014b7..4ce48dbae55b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3725,7 +3725,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 891fa359f7e0..6371bd33bf6d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2262,8 +2262,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); - if (ret) + if (ret) { + if (ret == -EIO) + ret = 0; goto out; + } f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN); break; case F2FS_GOING_DOWN_NOSYNC: @@ -2279,6 +2282,8 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) set_sbi_flag(sbi, SBI_IS_DIRTY); /* do checkpoint only */ ret = f2fs_sync_fs(sb, 1); + if (ret == -EIO) + ret = 0; goto out; default: ret = -EINVAL; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d194097c3da0..e435e1f58cd5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1361,8 +1361,13 @@ static int move_data_block(struct inode *inode, block_t bidx, set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* allocate block address */ - f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, + err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, type, NULL); + if (err) { + f2fs_put_page(mpage, 1); + /* filesystem should shutdown, no need to recovery block */ + goto up_out; + } fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dc511630eaa5..a5339dd7a932 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -400,6 +400,9 @@ int f2fs_commit_atomic_write(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { + if (f2fs_cp_error(sbi)) + return; + if (time_to_inject(sbi, FAULT_CHECKPOINT)) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT); @@ -2639,7 +2642,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG */ -static void get_new_segment(struct f2fs_sb_info *sbi, +static int get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); @@ -2714,6 +2717,7 @@ out_unlock: f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); f2fs_bug_on(sbi, 1); } + return ret; } static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) @@ -2722,6 +2726,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; unsigned short seg_type = curseg->seg_type; + /* only happen when get_new_segment() fails */ + if (curseg->next_segno == NULL_SEGNO) + return; + curseg->inited = true; curseg->segno = curseg->next_segno; curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); @@ -2786,7 +2794,10 @@ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec, pinning); + if (get_new_segment(sbi, &segno, new_sec, pinning)) { + curseg->segno = NULL_SEGNO; + return -ENOSPC; + } if (new_sec && pinning && !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { __set_free(sbi, segno); @@ -3428,7 +3439,7 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, +int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio) @@ -3445,6 +3456,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); + if (curseg->segno == NULL_SEGNO) + goto out_err; + if (from_gc) { f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr)); @@ -3504,6 +3518,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } + + if (curseg->segno == NULL_SEGNO) + goto out_err; } skip_new_segment: @@ -3538,8 +3555,15 @@ skip_new_segment: } mutex_unlock(&curseg->curseg_mutex); - f2fs_up_read(&SM_I(sbi)->curseg_lock); + return 0; +out_err: + *new_blkaddr = NULL_ADDR; + up_write(&sit_i->sentry_lock); + mutex_unlock(&curseg->curseg_mutex); + f2fs_up_read(&SM_I(sbi)->curseg_lock); + return -ENOSPC; + } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3577,8 +3601,16 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); - f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, + &fio->new_blkaddr, sum, type, fio)) { + if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host)) + fscrypt_finalize_bounce_page(&fio->encrypted_page); + if (PageWriteback(fio->page)) + end_page_writeback(fio->page); + if (f2fs_in_warm_node_list(fio->sbi, fio->page)) + f2fs_del_fsync_node_entry(fio->sbi, fio->page); + goto out; + } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr); @@ -3586,7 +3618,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) f2fs_submit_page_write(fio); f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - +out: if (keep_order) f2fs_up_read(&fio->sbi->io_order_lock); } From ea59b12ac69774c08aa95cd5b6100700ea0cce97 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 19 Feb 2024 10:28:44 +0800 Subject: [PATCH 31/60] f2fs: compress: fix to check compress flag w/ .i_sem lock It needs to check compress flag w/ .i_sem lock, otherwise, compressed inode may be disabled after the check condition, it's not needed to set compress option on non-compress inode. Fixes: e1e8debec656 ("f2fs: add F2FS_IOC_SET_COMPRESS_OPTION ioctl") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6371bd33bf6d..1c044fb19c87 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4028,16 +4028,20 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg) sizeof(option))) return -EFAULT; - if (!f2fs_compressed_file(inode) || - option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || - option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || - option.algorithm >= COMPRESS_MAX) + if (option.log_cluster_size < MIN_COMPRESS_LOG_SIZE || + option.log_cluster_size > MAX_COMPRESS_LOG_SIZE || + option.algorithm >= COMPRESS_MAX) return -EINVAL; file_start_write(filp); inode_lock(inode); f2fs_down_write(&F2FS_I(inode)->i_sem); + if (!f2fs_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } + if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) { ret = -EBUSY; goto out; From 8b10d3653735e117bc1954ade80d75ad7b46b801 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 22 Feb 2024 20:18:51 +0800 Subject: [PATCH 32/60] f2fs: introduce FAULT_NO_SEGMENT Use it to simulate no free segment case during block allocation. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 1 + Documentation/filesystems/f2fs.rst | 1 + fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 5 +++++ fs/f2fs/super.c | 1 + 5 files changed, 9 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 22d070c0de40..1a4d83953379 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -724,6 +724,7 @@ Description: Support configuring fault injection type, should be FAULT_LOCK_OP 0x000020000 FAULT_BLKADDR_VALIDITY 0x000040000 FAULT_BLKADDR_CONSISTENCE 0x000080000 + FAULT_NO_SEGMENT 0x000100000 =========================== =========== What: /sys/fs/f2fs//discard_io_aware_gran diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8930d9ea8c4e..efc3493fd6f8 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -205,6 +205,7 @@ fault_type=%d Support configuring fault injection type, should be FAULT_LOCK_OP 0x000020000 FAULT_BLKADDR_VALIDITY 0x000040000 FAULT_BLKADDR_CONSISTENCE 0x000080000 + FAULT_NO_SEGMENT 0x000100000 =========================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4ce48dbae55b..f1ec1a53afec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -62,6 +62,7 @@ enum { FAULT_LOCK_OP, FAULT_BLKADDR_VALIDITY, FAULT_BLKADDR_CONSISTENCE, + FAULT_NO_SEGMENT, FAULT_MAX, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a5339dd7a932..dab9544f871a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2656,6 +2656,11 @@ static int get_new_segment(struct f2fs_sb_info *sbi, spin_lock(&free_i->segmap_lock); + if (time_to_inject(sbi, FAULT_NO_SEGMENT)) { + ret = -ENOSPC; + goto out_unlock; + } + if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) { segno = find_next_zero_bit(free_i->free_segmap, GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 329f317e6f09..6190aace3d8c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -63,6 +63,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_LOCK_OP] = "lock_op", [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr", [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr", + [FAULT_NO_SEGMENT] = "no free segment", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, From de25240756cde07c1e5294279aac632599a91a53 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 20 Feb 2024 10:55:26 -0800 Subject: [PATCH 33/60] f2fs: prevent an f2fs_gc loop during disable_checkpoint Don't get stuck in the f2fs_gc loop while disabling checkpoint. Instead, we have a time-based management. Reviewed-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6190aace3d8c..b36c840b85ae 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2191,6 +2191,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) .init_gc_type = FG_GC, .should_migrate_blocks = false, .err_gc_skipped = true, + .no_bg_gc = true, .nr_free_secs = 1 }; f2fs_down_write(&sbi->gc_lock); From 7af2df0f67a1469762e59be3726a803882d83f6f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Feb 2024 12:32:05 -0800 Subject: [PATCH 34/60] f2fs: check number of blocks in a current section In cfd66bb715fd ("f2fs: fix deadloop in foreground GC"), we needed to check the number of blocks in a section instead of the segment. Fixes: cfd66bb715fd ("f2fs: fix deadloop in foreground GC") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bd3594968c5f..4595f1cc0382 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -561,23 +561,22 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, unsigned int node_blocks, unsigned int dent_blocks) { - unsigned int segno, left_blocks; + unsigned segno, left_blocks; int i; - /* check current node segment */ + /* check current node sections in the worst case. */ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; - + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (node_blocks > left_blocks) return false; } - /* check current data segment */ + /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = f2fs_usable_blks_in_seg(sbi, segno) - - get_seg_entry(sbi, segno)->ckpt_valid_blocks; + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); if (dent_blocks > left_blocks) return false; return true; @@ -626,7 +625,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (free_secs > upper_secs) return false; - else if (free_secs <= lower_secs) + if (free_secs <= lower_secs) return true; return !curseg_space; } From 4d4c5938933186a7eeff80b4f77c98dd583a98ea Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Feb 2024 13:23:30 -0800 Subject: [PATCH 35/60] f2fs: fix write pointers all the time Even if the roll forward recovery stopped due to any error, we have to fix the write pointers in order to mount the disk from the previous checkpoint. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 15 +++++++-------- fs/f2fs/super.c | 11 +++++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b3baec666afe..3078d5613748 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -863,7 +863,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; - bool fix_curseg_write_pointer = false; if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE)) f2fs_info(sbi, "recover fsync data on readonly fs"); @@ -894,8 +893,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) else f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); skip: - fix_curseg_write_pointer = !check_only || list_empty(&inode_list); - destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err); @@ -913,11 +910,13 @@ skip: * and the f2fs is not read only, check and fix zoned block devices' * write pointer consistency. */ - if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) && - f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_fix_curseg_write_pointer(sbi); - if (!err) - err = f2fs_check_write_pointer(sbi); + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sbi->sb)) { + int err2 = f2fs_fix_curseg_write_pointer(sbi); + + if (!err2) + err2 = f2fs_check_write_pointer(sbi); + if (err2) + err = err2; ret = err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b36c840b85ae..32c23a0caf04 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4674,11 +4674,14 @@ reset_checkpoint: * If the f2fs is not readonly and fsync data recovery succeeds, * check zoned block devices' write pointer consistency. */ - if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) { - err = f2fs_check_write_pointer(sbi); - if (err) - goto free_meta; + if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) { + int err2 = f2fs_check_write_pointer(sbi); + + if (err2) + err = err2; } + if (err) + goto free_meta; f2fs_init_inmem_curseg(sbi); From afbb8ff62b9bca638a99b935001115ef7ad5a599 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 16 Feb 2024 13:52:25 -0800 Subject: [PATCH 36/60] f2fs: print zone status in string and some log No functional change, but add some more logs. Note, it includes the spelling mistakes pointed by Colin Ian King. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 27 +++++++++++++++++++++------ fs/f2fs/super.c | 4 +++- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dab9544f871a..0e32ea36b0f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4909,6 +4909,16 @@ out: } #ifdef CONFIG_BLK_DEV_ZONED +static const char *f2fs_zone_status[BLK_ZONE_COND_OFFLINE + 1] = { + [BLK_ZONE_COND_NOT_WP] = "NOT_WP", + [BLK_ZONE_COND_EMPTY] = "EMPTY", + [BLK_ZONE_COND_IMP_OPEN] = "IMPLICIT_OPEN", + [BLK_ZONE_COND_EXP_OPEN] = "EXPLICIT_OPEN", + [BLK_ZONE_COND_CLOSED] = "CLOSED", + [BLK_ZONE_COND_READONLY] = "READONLY", + [BLK_ZONE_COND_FULL] = "FULL", + [BLK_ZONE_COND_OFFLINE] = "OFFLINE", +}; static int check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, @@ -4929,14 +4939,19 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * Skip check of zones cursegs point to, since * fix_curseg_write_pointer() checks them. */ - if (zone_segno >= MAIN_SEGS(sbi) || - IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) + if (zone_segno >= MAIN_SEGS(sbi)) return 0; /* * Get # of valid block of the zone. */ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true); + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { + f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, + f2fs_zone_status[zone->cond]); + return 0; + } if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) || (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL)) @@ -4944,8 +4959,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, if (!valid_block_cnt) { f2fs_notice(sbi, "Zone without valid block has non-zero write " - "pointer. Reset the write pointer: cond[0x%x]", - zone->cond); + "pointer. Reset the write pointer: cond[%s]", + f2fs_zone_status[zone->cond]); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); if (ret) @@ -4962,8 +4977,8 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, * selected for write operation until it get discarded. */ f2fs_notice(sbi, "Valid blocks are not aligned with write " - "pointer: valid block[0x%x,0x%x] cond[0x%x]", - zone_segno, valid_block_cnt, zone->cond); + "pointer: valid block[0x%x,0x%x] cond[%s]", + zone_segno, valid_block_cnt, f2fs_zone_status[zone->cond]); ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, zone->start, zone->len, GFP_NOFS); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 32c23a0caf04..b038028cff27 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4675,8 +4675,10 @@ reset_checkpoint: * check zoned block devices' write pointer consistency. */ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) { - int err2 = f2fs_check_write_pointer(sbi); + int err2; + f2fs_notice(sbi, "Checking entire write pointers"); + err2 = f2fs_check_write_pointer(sbi); if (err2) err = err2; } From 9ddabae70a67b6fd6d5c7f96dba95a7ce26e7dce Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 20 Feb 2024 13:28:57 -0800 Subject: [PATCH 37/60] f2fs: allow to mount if cap is 100 Don't block mounting the partition, if cap is 100%. Reviewed-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0e32ea36b0f4..7a060219da85 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -904,6 +904,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); + + if (F2FS_OPTION(sbi).unusable_cap_perc == 100) + return 0; if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && From 7324858237829733dec9c670170df2377c5ca6e2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 25 Feb 2024 14:36:28 +0800 Subject: [PATCH 38/60] f2fs: fix to use correct segment type in f2fs_allocate_data_block() @type in f2fs_allocate_data_block() indicates log header's type, it can be CURSEG_COLD_DATA_PINNED or CURSEG_ALL_DATA_ATGC, rather than type of data/node, however IS_DATASEG()/IS_NODESEG() only accept later one, fix it. Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7a060219da85..3b8aed8ca7d4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3540,12 +3540,12 @@ skip_new_segment: locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - if (IS_DATASEG(type)) + if (IS_DATASEG(curseg->seg_type)) atomic64_inc(&sbi->allocated_data_blocks); up_write(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) { + if (page && IS_NODESEG(curseg->seg_type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); f2fs_inode_chksum_set(sbi, page); From 2fc2bcc8d39945ac9e26114280d2b6d1e624825c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 11:19:16 +0800 Subject: [PATCH 39/60] f2fs: fix to check return value in f2fs_insert_range() In f2fs_insert_range(), it missed to check return value of filemap_write_and_wait_range(), fix it. Meanwhile, just return error number once __exchange_data_block() fails. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1c044fb19c87..4ca6c693b33a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1679,10 +1679,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) } filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + if (ret) + return ret; /* write out all moved pages, if possible */ filemap_invalidate_lock(mapping); - filemap_write_and_wait_range(mapping, offset, LLONG_MAX); + ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); filemap_invalidate_unlock(mapping); From 4b99ecd304290c4ef55666a62c89dfb2dbf0b2cd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 15:35:38 +0800 Subject: [PATCH 40/60] f2fs: ro: compress: fix to avoid caching unaligned extent Mapping info from dump.f2fs: i_addr[0x2d] cluster flag [0xfffffffe : 4294967294] i_addr[0x2e] [0x 10428 : 66600] i_addr[0x2f] [0x 10429 : 66601] i_addr[0x30] [0x 1042a : 66602] f2fs_io fiemap 37 1 /mnt/f2fs/disk-58390c8c.raw Previsouly, it missed to align fofs and ofs_in_node to cluster_size, result in adding incorrect read extent cache, fix it. Before: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 37, len = 4, blkaddr = 66600, c_len = 3 After: f2fs_update_read_extent_tree_range: dev = (253,48), ino = 5, pgofs = 36, len = 4, blkaddr = 66600, c_len = 3 Fixes: 94afd6d6e525 ("f2fs: extent cache: support unaligned extent") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 10 ++++++---- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/node.c | 20 ++++++++++++++------ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 3dc488ce882b..8892c8262141 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1817,16 +1817,18 @@ void f2fs_put_page_dic(struct page *page, bool in_task) * check whether cluster blocks are contiguous, and add extent cache entry * only if cluster blocks are logically and physically contiguous. */ -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node) { - bool compressed = f2fs_data_blkaddr(dn) == COMPRESS_ADDR; + bool compressed = data_blkaddr(dn->inode, dn->node_page, + ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + i); + ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) break; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f1ec1a53afec..db05fd02350a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4305,7 +4305,8 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); void f2fs_put_page_dic(struct page *page, bool in_task); -unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn); +unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, + unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); @@ -4362,7 +4363,8 @@ static inline void f2fs_put_page_dic(struct page *page, bool in_task) { WARN_ON_ONCE(1); } -static inline unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn) { return 0; } +static inline unsigned int f2fs_cluster_blocks_are_contiguous( + struct dnode_of_data *dn, unsigned int ofs_in_node) { return 0; } static inline bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) { return false; } static inline int f2fs_init_compress_inode(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_compress_inode(struct f2fs_sb_info *sbi) { } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 51241996b9ec..b3de6d6cdb02 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -852,21 +852,29 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && f2fs_sb_has_readonly(sbi)) { - unsigned int c_len = f2fs_cluster_blocks_are_contiguous(dn); + unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size; + unsigned int ofs_in_node = dn->ofs_in_node; + pgoff_t fofs = index; + unsigned int c_len; block_t blkaddr; + /* should align fofs and ofs_in_node to cluster_size */ + if (fofs % cluster_size) { + fofs = round_down(fofs, cluster_size); + ofs_in_node = round_down(ofs_in_node, cluster_size); + } + + c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node); if (!c_len) goto out; - blkaddr = f2fs_data_blkaddr(dn); + blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); if (blkaddr == COMPRESS_ADDR) blkaddr = data_blkaddr(dn->inode, dn->node_page, - dn->ofs_in_node + 1); + ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, - index, blkaddr, - F2FS_I(dn->inode)->i_cluster_size, - c_len); + fofs, blkaddr, cluster_size, c_len); } out: return 0; From a217f1873ab992c9f175d08d951334df173d3d54 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 15:35:39 +0800 Subject: [PATCH 41/60] f2fs: ro: don't start discard thread for readonly image [ 9299.893835] F2FS-fs (vdd): Allow to mount readonly mode only mount: /mnt/f2fs: WARNING: source write-protected, mounted read-only. root@qemu:/ ps -ef|grep f2fs root 94 2 0 03:46 ? 00:00:00 [kworker/u17:0-f2fs_post_read_wq] root 6282 2 0 06:21 ? 00:00:00 [f2fs_discard-253:48] There will be no deletion in readonly image, let's skip starting discard thread to save system resources. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3b8aed8ca7d4..8014ea9f7a55 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2253,6 +2253,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; int err = 0; + if (f2fs_sb_has_readonly(sbi)) { + f2fs_info(sbi, + "Skip to start discard thread for readonly image"); + return 0; + } + if (!f2fs_realtime_discard_enable(sbi)) return 0; From 8249aac1b05c28f3b35363b844b5b0194f838052 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 09:32:05 +0800 Subject: [PATCH 42/60] f2fs: fix blkofs_end correctly in f2fs_migrate_blocks() In f2fs_migrate_blocks(), when traversing blocks in last section, blkofs_end should be (start_blk + blkcnt - 1) % blk_per_sec, fix it. Signed-off-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c21b92f18463..0c728e82d936 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3841,13 +3841,14 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); + unsigned int end_blk = start_blk + blkcnt - 1; unsigned int secidx = start_blk / blk_per_sec; unsigned int end_sec; int ret = 0; if (!blkcnt) return 0; - end_sec = secidx + (blkcnt - 1) / blk_per_sec; + end_sec = end_blk / blk_per_sec; f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -3857,7 +3858,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, for (; secidx <= end_sec; secidx++) { unsigned int blkofs_end = secidx == end_sec ? - (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; + end_blk % blk_per_sec : blk_per_sec - 1; f2fs_down_write(&sbi->pin_sem); From f1e7646a8cd446c2003c5f98a89880eb987dec72 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 09:32:06 +0800 Subject: [PATCH 43/60] f2fs: relocate f2fs_precache_extents() in f2fs_swap_activate() This patch exchangs position of f2fs_precache_extents() and filemap_fdatawrite(), so that f2fs_precache_extents() can load extent info after physical addresses of all data are fixed. Signed-off-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0c728e82d936..bd8674bf1d84 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -4045,12 +4045,12 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (!f2fs_disable_compressed_file(inode)) return -EINVAL; - f2fs_precache_extents(inode); - ret = filemap_fdatawrite(inode->i_mapping); if (ret < 0) return ret; + f2fs_precache_extents(inode); + ret = check_swap_activate(sis, file, span); if (ret < 0) return ret; From 1081b5121b27ed4824d90cbcdb1c662c503ffd09 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 09:32:07 +0800 Subject: [PATCH 44/60] f2fs: clean up new_curseg() Move f2fs_valid_pinned_area() check logic from new_curseg() to get_new_segment(), it can avoid calling __set_free() if it fails to find free segment in conventional zone for pinned file. Signed-off-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8014ea9f7a55..c88897db1e44 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2722,12 +2722,19 @@ find_other_zone: got_it: /* set it as dirty segment in free segmap */ f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); + + /* no free section in conventional zone */ + if (new_sec && pinning && + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + ret = -EAGAIN; + goto out_unlock; + } __set_inuse(sbi, segno); *newseg = segno; out_unlock: spin_unlock(&free_i->segmap_lock); - if (ret) { + if (ret == -ENOSPC) { f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); f2fs_bug_on(sbi, 1); } @@ -2803,19 +2810,17 @@ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno = curseg->segno; bool pinning = type == CURSEG_COLD_DATA_PINNED; + int ret; if (curseg->inited) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); segno = __get_next_segno(sbi, type); - if (get_new_segment(sbi, &segno, new_sec, pinning)) { - curseg->segno = NULL_SEGNO; - return -ENOSPC; - } - if (new_sec && pinning && - !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { - __set_free(sbi, segno); - return -EAGAIN; + ret = get_new_segment(sbi, &segno, new_sec, pinning); + if (ret) { + if (ret == -ENOSPC) + curseg->segno = NULL_SEGNO; + return ret; } curseg->next_segno = segno; From 42a80aacb76bed85f453b10f662877ed60d37164 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Feb 2024 09:32:08 +0800 Subject: [PATCH 45/60] f2fs: fix to reset fields for unloaded curseg In f2fs_allocate_data_block(), before skip allocating new segment for DATA_PINNED log header, it needs to tag log header as unloaded one to avoid skipping logic in locate_dirty_segment() and __f2fs_save_inmem_curseg(). Signed-off-by: Chao Yu Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c88897db1e44..fa395801137b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3108,12 +3108,16 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; + if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited) + goto allocate; + if (!force && curseg->inited && !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) return 0; +allocate: old_segno = curseg->segno; if (new_curseg(sbi, type, true)) return -EAGAIN; @@ -3458,6 +3462,13 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } +static void reset_curseg_fields(struct curseg_info *curseg) +{ + curseg->inited = false; + curseg->segno = NULL_SEGNO; + curseg->next_segno = 0; +} + int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, @@ -3524,8 +3535,10 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, */ if (segment_full) { if (type == CURSEG_COLD_DATA_PINNED && - !((curseg->segno + 1) % sbi->segs_per_sec)) + !((curseg->segno + 1) % sbi->segs_per_sec)) { + reset_curseg_fields(curseg); goto skip_new_segment; + } if (from_gc) { get_atssr_segment(sbi, type, se->type, @@ -4601,9 +4614,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) array[i].seg_type = CURSEG_COLD_DATA; else if (i == CURSEG_ALL_DATA_ATGC) array[i].seg_type = CURSEG_COLD_DATA; - array[i].segno = NULL_SEGNO; - array[i].next_blkoff = 0; - array[i].inited = false; + reset_curseg_fields(&array[i]); } return restore_curseg_summaries(sbi); } From 36959d18c3cf09b3c12157c6950e18652067de77 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Wed, 28 Feb 2024 19:59:54 +0800 Subject: [PATCH 46/60] f2fs: fix to do sanity check in update_sit_entry If GET_SEGNO return NULL_SEGNO for some unecpected case, update_sit_entry will access invalid memory address, cause system crash. It is better to do sanity check about GET_SEGNO just like update_segment_mtime & locate_dirty_segment. Also remove some redundant judgment code. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fa395801137b..0d21ce615cec 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2413,6 +2413,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) #endif segno = GET_SEGNO(sbi, blkaddr); + if (segno == NULL_SEGNO) + return; se = get_seg_entry(sbi, segno); new_vblocks = se->valid_blocks + del; @@ -3526,8 +3528,7 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * since SSR needs latest valid block information. */ update_sit_entry(sbi, *new_blkaddr, 1); - if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - update_sit_entry(sbi, old_blkaddr, -1); + update_sit_entry(sbi, old_blkaddr, -1); /* * If the current segment is full, flush it out and replace it with a From 28f66cc654039edff3e34200c3357d92a7c4d272 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 1 Mar 2024 16:25:54 +0800 Subject: [PATCH 47/60] f2fs: fix to check return value __allocate_new_segment __allocate_new_segment may return error when get_new_segment fails, so its caller should check its return value. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index db05fd02350a..433101223e7a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3705,7 +3705,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3078d5613748..c381f0af33f3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -851,7 +851,7 @@ next: f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } if (!err) - f2fs_allocate_new_segments(sbi); + err = f2fs_allocate_new_segments(sbi); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0d21ce615cec..e284a3dc97c4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3163,16 +3163,19 @@ retry: return err; } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + int err = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) - __allocate_new_segment(sbi, i, false, false); + err += __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return err; } bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, From 22af1b8c31cbf7daf235a4fcb975089ad8c07fbe Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 1 Mar 2024 16:25:55 +0800 Subject: [PATCH 48/60] f2fs: fix to check return value of f2fs_gc_range f2fs_gc_range may return error, so its caller f2fs_allocate_pinning_section should determine whether to do retry based on ist return value. Also just do f2fs_gc_range when f2fs_allocate_new_section return -EAGAIN, and check cp error case in f2fs_gc_range. Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 3 +++ fs/f2fs/segment.c | 13 ++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e435e1f58cd5..c60b7472475b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1986,6 +1986,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, unsigned int segno; unsigned int gc_secs = dry_run_sections; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e284a3dc97c4..5abb044a9cdf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3109,6 +3109,7 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int old_segno; + int err = 0; if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited) goto allocate; @@ -3121,8 +3122,9 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, allocate: old_segno = curseg->segno; - if (new_curseg(sbi, type, true)) - return -EAGAIN; + err = new_curseg(sbi, type, true); + if (err) + return err; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); return 0; @@ -3151,13 +3153,14 @@ retry: err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); - if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { + if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { f2fs_down_write(&sbi->gc_lock); - f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); + err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); f2fs_up_write(&sbi->gc_lock); gc_required = false; - goto retry; + if (!err) + goto retry; } return err; From 45809cd3bdac8743f08e42a32121e035aee69d88 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Mar 2024 11:28:55 +0800 Subject: [PATCH 49/60] f2fs: introduce SEGS_TO_BLKS/BLKS_TO_SEGS for cleanup Just cleanup, no functional change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 7 +++---- fs/f2fs/f2fs.h | 14 ++++++++------ fs/f2fs/gc.c | 10 +++++----- fs/f2fs/gc.h | 4 ++-- fs/f2fs/segment.c | 12 ++++++------ fs/f2fs/segment.h | 15 +++++++-------- fs/f2fs/super.c | 16 ++++++++-------- fs/f2fs/sysfs.c | 4 ++-- 8 files changed, 41 insertions(+), 41 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0d02224b99b7..8b0e1e71b667 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -135,7 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->cur_ckpt_time = sbi->cprc_info.cur_time; si->peak_ckpt_time = sbi->cprc_info.peak_time; spin_unlock(&sbi->cprc_info.stat_lock); - si->total_count = (int)sbi->user_block_count / BLKS_PER_SEG(sbi); + si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count); si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); @@ -176,11 +176,10 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; - si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) + si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; - si->util_valid = (int)(written_block_count(sbi) >> - sbi->log_blocks_per_seg) + si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi))) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) / 2; si->util_invalid = 50 - si->util_free - si->util_valid; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 433101223e7a..4836e7cb0efe 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1815,12 +1815,14 @@ struct f2fs_sb_info { }; /* Definitions to access f2fs_sb_info */ -#define BLKS_PER_SEG(sbi) \ - ((sbi)->blocks_per_seg) -#define BLKS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec << (sbi)->log_blocks_per_seg) -#define SEGS_PER_SEC(sbi) \ - ((sbi)->segs_per_sec) +#define SEGS_TO_BLKS(sbi, segs) \ + ((segs) << (sbi)->log_blocks_per_seg) +#define BLKS_TO_SEGS(sbi, blks) \ + ((blks) >> (sbi)->log_blocks_per_seg) + +#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg) +#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec)) +#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec) __printf(3, 4) void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c60b7472475b..96ba3585f1cf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -301,7 +301,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, /* LFS */ if (p->gc_mode == GC_GREEDY) - return 2 * BLKS_PER_SEG(sbi) * p->ofs_unit; + return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit); else if (p->gc_mode == GC_CB) return UINT_MAX; else if (p->gc_mode == GC_AT) @@ -348,7 +348,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) mtime = div_u64(mtime, usable_segs_per_sec); vblocks = div_u64(vblocks, usable_segs_per_sec); - u = (vblocks * 100) >> sbi->log_blocks_per_seg; + u = BLKS_TO_SEGS(sbi, vblocks * 100); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -2081,7 +2081,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count = cpu_to_le32(segment_count + segs); raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + - (long long)(segs << sbi->log_blocks_per_seg)); + (long long)SEGS_TO_BLKS(sbi, segs)); if (f2fs_is_multi_device(sbi)) { int last_dev = sbi->s_ndevs - 1; int dev_segs = @@ -2097,7 +2097,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { int segs = secs * SEGS_PER_SEC(sbi); - long long blks = (long long)segs << sbi->log_blocks_per_seg; + long long blks = SEGS_TO_BLKS(sbi, segs); long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -2139,7 +2139,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count) int last_dev = sbi->s_ndevs - 1; __u64 last_segs = FDEV(last_dev).total_segments; - if (block_count + (last_segs << sbi->log_blocks_per_seg) <= + if (block_count + SEGS_TO_BLKS(sbi, last_segs) <= old_block_count) return -EINVAL; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 28a00942802c..9c0d06c4d19a 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -96,7 +96,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi) if (f2fs_sb_has_blkzoned(sbi)) return free_segs_blk_count_zoned(sbi); - return free_segments(sbi) << sbi->log_blocks_per_seg; + return SEGS_TO_BLKS(sbi, free_segments(sbi)); } static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) @@ -104,7 +104,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi) block_t free_blks, ovp_blks; free_blks = free_segs_blk_count(sbi); - ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi)); if (free_blks < ovp_blks) return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5abb044a9cdf..4ff3b2d14ddf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -451,8 +451,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); - unsigned int threshold = (factor * DEFAULT_DIRTY_THRESHOLD) << - sbi->log_blocks_per_seg; + unsigned int threshold = + SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD)); unsigned int global_threshold = threshold * 3 / 2; if (dents >= threshold || qdata >= threshold || @@ -875,7 +875,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { int ovp_hole_segs = (overprovision_segments(sbi) - reserved_segments(sbi)); - block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; + block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs); struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); block_t holes[2] = {0, 0}; /* DATA and NODE */ block_t unusable; @@ -2188,7 +2188,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, if (!f2fs_sb_has_blkzoned(sbi) && (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) { f2fs_issue_discard(sbi, START_BLOCK(sbi, start), - (end - start) << sbi->log_blocks_per_seg); + SEGS_TO_BLKS(sbi, end - start)); continue; } next: @@ -2305,7 +2305,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->queued_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi)); dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; @@ -4548,7 +4548,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) #endif sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr); - sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg; + sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs); sit_i->written_valid_blocks = 0; sit_i->bitmap_size = sit_bitmap_size; sit_i->dirty_sentries = 0; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 4595f1cc0382..e1c0f418aa11 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -77,21 +77,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define TOTAL_SEGS(sbi) \ (SM_I(sbi) ? SM_I(sbi)->segment_count : \ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count)) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi))) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) #define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) + (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno)))) #define NEXT_FREE_BLKADDR(sbi, curseg) \ (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) + (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr))) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1)) @@ -100,11 +100,10 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define CAP_BLKS_PER_SEC(sbi) \ - (SEGS_PER_SEC(sbi) * BLKS_PER_SEG(sbi) - \ - (sbi)->unusable_blocks_per_sec) + (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec) #define CAP_SEGS_PER_SEC(sbi) \ - (SEGS_PER_SEC(sbi) - ((sbi)->unusable_blocks_per_sec >> \ - (sbi)->log_blocks_per_seg)) + (SEGS_PER_SEC(sbi) - \ + BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ @@ -904,7 +903,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) if (type == DATA) return BLKS_PER_SEG(sbi); else if (type == NODE) - return 8 * BLKS_PER_SEG(sbi); + return SEGS_TO_BLKS(sbi, 8); else if (type == META) return 8 * BIO_MAX_VECS; else diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b038028cff27..78a76583a4aa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3756,9 +3756,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec); sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone); sbi->total_sections = le32_to_cpu(raw_super->section_count); - sbi->total_node_count = - ((le32_to_cpu(raw_super->segment_count_nat) / 2) * - NAT_ENTRY_PER_BLOCK) << sbi->log_blocks_per_seg; + sbi->total_node_count = SEGS_TO_BLKS(sbi, + ((le32_to_cpu(raw_super->segment_count_nat) / 2) * + NAT_ENTRY_PER_BLOCK)); F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino); F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); @@ -4200,14 +4200,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (i == 0) { FDEV(i).start_blk = 0; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1 + - le32_to_cpu(raw_super->segment0_blkaddr); + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1 + + le32_to_cpu(raw_super->segment0_blkaddr); } else { FDEV(i).start_blk = FDEV(i - 1).end_blk + 1; FDEV(i).end_blk = FDEV(i).start_blk + - (FDEV(i).total_segments << - sbi->log_blocks_per_seg) - 1; + SEGS_TO_BLKS(sbi, + FDEV(i).total_segments) - 1; FDEV(i).bdev_handle = bdev_open_by_path( FDEV(i).path, mode, sbi->sb, NULL); } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 906d2af2d849..10f308b3128f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -493,8 +493,8 @@ out: spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - F2FS_OPTION(sbi).root_reserved_blocks - - (SM_I(sbi)->additional_reserved_segments << - sbi->log_blocks_per_seg))) { + SEGS_TO_BLKS(sbi, + SM_I(sbi)->additional_reserved_segments))) { spin_unlock(&sbi->stat_lock); return -EINVAL; } From f238eff95f485b9e8a1b1a8d4602e8e9d0ae331d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 4 Mar 2024 15:16:56 -0800 Subject: [PATCH 50/60] f2fs: add a proc entry show disk layout This patch adds the disk map of block address ranges configured by multiple partitions. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 10f308b3128f..a568ce96cf56 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1492,6 +1492,50 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq, return 0; } +static int __maybe_unused disk_map_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + seq_printf(seq, "Address Layout : %5luB Block address (# of Segments)\n", + F2FS_BLKSIZE); + seq_printf(seq, " SB : %12s\n", "0/1024B"); + seq_printf(seq, " seg0_blkaddr : 0x%010x\n", SEG0_BLKADDR(sbi)); + seq_printf(seq, " Checkpoint : 0x%010x (%10d)\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2); + seq_printf(seq, " SIT : 0x%010x (%10d)\n", + SIT_I(sbi)->sit_base_addr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit)); + seq_printf(seq, " NAT : 0x%010x (%10d)\n", + NM_I(sbi)->nat_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat)); + seq_printf(seq, " SSA : 0x%010x (%10d)\n", + SM_I(sbi)->ssa_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa)); + seq_printf(seq, " Main : 0x%010x (%10d)\n", + SM_I(sbi)->main_blkaddr, + le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main)); + seq_printf(seq, " # of Sections : %12d\n", + le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count)); + seq_printf(seq, " Segs/Sections : %12d\n", + SEGS_PER_SEC(sbi)); + seq_printf(seq, " Section size : %12d MB\n", + SEGS_PER_SEC(sbi) << 1); + + if (!f2fs_is_multi_device(sbi)) + return 0; + + seq_puts(seq, "\nDisk Map for multi devices:\n"); + for (i = 0; i < sbi->s_ndevs; i++) + seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n", + i, bdev_is_zoned(FDEV(i).bdev), + FDEV(i).start_blk, FDEV(i).end_blk, + FDEV(i).path); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -1573,6 +1617,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) victim_bits_seq_show, sb); proc_create_single_data("discard_plist_info", 0444, sbi->s_proc, discard_plist_seq_show, sb); + proc_create_single_data("disk_map", 0444, sbi->s_proc, + disk_map_seq_show, sb); return 0; put_feature_list_kobj: kobject_put(&sbi->s_feature_list_kobj); From b7d797d241c154d73ec5523f87f3b06d4f299da1 Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:45 +0800 Subject: [PATCH 51/60] f2fs: compress: relocate some judgments in f2fs_reserve_compress_blocks The following f2fs_io test will get a "0" result instead of -EINVAL, unisoc # ./f2fs_io compress file unisoc # ./f2fs_io reserve_cblocks file 0 it's not reasonable, so the judgement of atomic_read(&F2FS_I(inode)->i_compr_blocks) should be placed after the judgement of is_inode_flag_set(inode, FI_COMPRESS_RELEASED). Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4ca6c693b33a..74c5e48fce22 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3720,9 +3720,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (ret) return ret; - if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) - goto out; - f2fs_balance_fs(sbi, true); inode_lock(inode); @@ -3732,6 +3729,9 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } + if (atomic_read(&F2FS_I(inode)->i_compr_blocks)) + goto unlock_inode; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -3778,7 +3778,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } unlock_inode: inode_unlock(inode); -out: mnt_drop_write_file(filp); if (ret >= 0) { From 2f6d721e14b69d6e1251f69fa238b48e8374e25f Mon Sep 17 00:00:00 2001 From: Xiuhong Wang Date: Wed, 6 Mar 2024 11:47:46 +0800 Subject: [PATCH 52/60] f2fs: compress: fix reserve_cblocks counting error when out of space When a file only needs one direct_node, performing the following operations will cause the file to be unrepairable: unisoc # ./f2fs_io compress test.apk unisoc #df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.2M 100% /data unisoc # ./f2fs_io release_cblocks test.apk 924 unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 4.8M 100% /data unisoc # dd if=/dev/random of=file4 bs=1M count=3 3145728 bytes (3.0 M) copied, 0.025 s, 120 M/s unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 0 This is because the file has only one direct_node. After returning to -ENOSPC, reserved_blocks += ret will not be executed. As a result, the reserved_blocks at this time is still 0, which is not the real number of reserved blocks. Therefore, fsck cannot be set to repair the file. After this patch, the fsck flag will be set to fix this problem. unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 1.8M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk F2FS_IOC_RESERVE_COMPRESS_BLOCKS failed: No space left on device adb reboot then fsck will be executed unisoc # df -h | grep dm-48 /dev/block/dm-48 112G 112G 11M 100% /data unisoc # ./f2fs_io reserve_cblocks test.apk 924 Fixes: c75488fb4d82 ("f2fs: introduce F2FS_IOC_RESERVE_COMPRESS_BLOCKS") Signed-off-by: Xiuhong Wang Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 74c5e48fce22..dc9c6bac678d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3624,10 +3624,10 @@ out: return ret; } -static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) +static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, + unsigned int *reserved_blocks) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - unsigned int reserved_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; block_t blkaddr; int i; @@ -3691,12 +3691,12 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count) f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - reserved_blocks += reserved; + *reserved_blocks += reserved; next: count -= cluster_size; } - return reserved_blocks; + return 0; } static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) @@ -3757,7 +3757,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, F2FS_I(inode)->i_cluster_size); - ret = reserve_compress_blocks(&dn, count); + ret = reserve_compress_blocks(&dn, count, &reserved_blocks); f2fs_put_dnode(&dn); @@ -3765,13 +3765,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; page_idx += count; - reserved_blocks += ret; } filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (ret >= 0) { + if (!ret) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); inode_set_ctime_current(inode); f2fs_mark_inode_dirty_sync(inode, true); @@ -3780,7 +3779,7 @@ unlock_inode: inode_unlock(inode); mnt_drop_write_file(filp); - if (ret >= 0) { + if (!ret) { ret = put_user(reserved_blocks, (u64 __user *)arg); } else if (reserved_blocks && atomic_read(&F2FS_I(inode)->i_compr_blocks)) { From 9f0c4a46be1fe9b97dbe66d49204c1371e3ece65 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Mar 2024 09:08:34 +0800 Subject: [PATCH 53/60] f2fs: fix to truncate meta inode pages forcely Below race case can cause data corruption: Thread A GC thread - gc_data_segment - ra_data_block - locked meta_inode page - f2fs_inplace_write_data - invalidate_mapping_pages : fail to invalidate meta_inode page due to lock failure or dirty|writeback status - f2fs_submit_page_bio : write last dirty data to old blkaddr - move_data_block - load old data from meta_inode page - f2fs_submit_page_write : write old data to new blkaddr Because invalidate_mapping_pages() will skip invalidating page which has unclear status including locked, dirty, writeback and so on, so we need to use truncate_inode_pages_range() instead of invalidate_mapping_pages() to make sure meta_inode page will be dropped. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +++-- fs/f2fs/f2fs.h | 28 +++++++++++++++++++++++++++- fs/f2fs/segment.c | 5 ++--- include/linux/f2fs_fs.h | 1 + 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a09a9609e228..55b7d2cf030f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1598,8 +1598,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) */ if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || f2fs_sb_has_compression(sbi)) - invalidate_mapping_pages(META_MAPPING(sbi), - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + f2fs_bug_on(sbi, + invalidate_inode_pages2_range(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1)); f2fs_release_ino_entry(sbi, false); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4836e7cb0efe..9814e5981a6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4655,10 +4655,36 @@ static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi) return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb); } +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int cnt) +{ + bool need_submit = false; + int i = 0; + + do { + struct page *page; + + page = find_get_page(META_MAPPING(sbi), blkaddr + i); + if (page) { + if (PageWriteback(page)) + need_submit = true; + f2fs_put_page(page, 0); + } + } while (++i < cnt && !need_submit); + + if (need_submit) + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode, + NULL, 0, DATA); + + truncate_inode_pages_range(META_MAPPING(sbi), + F2FS_BLK_TO_BYTES((loff_t)blkaddr), + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1))); +} + static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, block_t blkaddr) { - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1); f2fs_invalidate_compress_page(sbi, blkaddr); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4ff3b2d14ddf..20af48d7f784 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3741,8 +3741,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) } if (fio->post_read) - invalidate_mapping_pages(META_MAPPING(sbi), - fio->new_blkaddr, fio->new_blkaddr); + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -3932,7 +3931,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 755e9a41b196..a357287eac1e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -27,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 From 11bec96afbfbc4679863db55258de440d786821e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Mar 2024 11:50:57 +0800 Subject: [PATCH 54/60] f2fs: zone: fix to remove pow2 check condition for zoned block device Commit 2e2c6e9b72ce ("f2fs: remove power-of-two limitation of zoned device") missed to remove pow2 check condition in init_blkz_info(), fix it. Fixes: 2e2c6e9b72ce ("f2fs: remove power-of-two limitation of zoned device") Signed-off-by: Feng Song Signed-off-by: Yongpeng Yang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 78a76583a4aa..c4c4c957a3c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3868,11 +3868,6 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) return 0; zone_sectors = bdev_zone_sectors(bdev); - if (!is_power_of_2(zone_sectors)) { - f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n"); - return -EINVAL; - } - if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != SECTOR_TO_BLOCK(zone_sectors)) return -EINVAL; From 31f85ccc84b82cc7eb122af01f5017fbe1e29289 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Fri, 8 Mar 2024 14:25:25 +0800 Subject: [PATCH 55/60] f2fs: unify the error handling of f2fs_is_valid_blkaddr There are some cases of f2fs_is_valid_blkaddr not handled as ERROR_INVALID_BLKADDR,so unify the error handling about all of f2fs_is_valid_blkaddr. Do f2fs_handle_error in __f2fs_is_valid_blkaddr for cleanup. Signed-off-by: Zhiguo Niu Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 40 ++++++++++++++++++++++------------------ fs/f2fs/data.c | 22 +++------------------- fs/f2fs/extent_cache.c | 5 +---- fs/f2fs/file.c | 16 +++------------- fs/f2fs/gc.c | 2 -- fs/f2fs/recovery.c | 4 ---- fs/f2fs/segment.c | 2 -- 7 files changed, 29 insertions(+), 62 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 55b7d2cf030f..eac698b8dd38 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -154,19 +154,20 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, if (unlikely(f2fs_cp_error(sbi))) return exist; - if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return exist; - } + if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) || + (!exist && type == DATA_GENERIC_ENHANCE)) + goto out_err; + if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE) + goto out_handle; + return exist; - if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", - blkaddr, exist); - set_sbi_flag(sbi, SBI_NEED_FSCK); - dump_stack(); - } +out_err: + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); + set_sbi_flag(sbi, SBI_NEED_FSCK); + dump_stack(); +out_handle: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return exist; } @@ -178,22 +179,22 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, break; case META_SIT: if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) - return false; + goto err; break; case META_SSA: if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || blkaddr < SM_I(sbi)->ssa_blkaddr)) - return false; + goto err; break; case META_CP: if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || blkaddr < __start_cp_addr(sbi))) - return false; + goto err; break; case META_POR: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) - return false; + goto err; break; case DATA_GENERIC: case DATA_GENERIC_ENHANCE: @@ -210,7 +211,7 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); dump_stack(); - return false; + goto err; } else { return __is_bitmap_valid(sbi, blkaddr, type); } @@ -218,13 +219,16 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case META_GENERIC: if (unlikely(blkaddr < SEG0_BLKADDR(sbi) || blkaddr >= MAIN_BLKADDR(sbi))) - return false; + goto err; break; default: BUG(); } return true; +err: + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + return false; } bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index bd8674bf1d84..d9494b5fc7c1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -690,10 +690,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? - META_GENERIC : DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } trace_f2fs_submit_page_bio(page, fio); @@ -888,10 +886,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) fio->encrypted_page : fio->page; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, - __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) { - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - } trace_f2fs_submit_page_bio(page, fio); @@ -1219,8 +1215,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } goto got_it; @@ -1246,8 +1240,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto put_err; } got_it: @@ -1578,7 +1570,6 @@ next_block: if (!is_hole && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto sync_out; } @@ -2102,8 +2093,6 @@ got_it: if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; - f2fs_handle_error(F2FS_I_SB(inode), - ERROR_INVALID_BLKADDR); goto out; } } else { @@ -2641,11 +2630,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) f2fs_lookup_read_extent_cache_block(inode, page->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, - DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(fio->sbi, - ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE)) return -EFSCORRUPTED; - } ipu_force = true; fio->need_lock = LOCK_DONE; @@ -2673,7 +2659,6 @@ got_it: !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { err = -EFSCORRUPTED; - f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); goto out_writepage; } @@ -3640,7 +3625,6 @@ repeat: if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; } err = f2fs_submit_page_read(use_cow ? diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index ad8dfac73bd4..48048fa36427 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -43,7 +43,6 @@ bool sanity_check_extent_cache(struct inode *inode) if (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE) || !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, DATA_GENERIC_ENHANCE)) { - set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", __func__, inode->i_ino, ei->blk, ei->fofs, ei->len); @@ -856,10 +855,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei, goto out; if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - f2fs_bug_on(sbi, 1); + !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) return -EINVAL; - } out: /* * init block age with zero, this can happen when the block age extent diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index dc9c6bac678d..4dfe38e73130 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -608,10 +608,8 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) continue; if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, - DATA_GENERIC_ENHANCE)) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE)) continue; - } if (compressed_cluster) valid_blocks++; } @@ -1209,7 +1207,6 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); return -EFSCORRUPTED; } @@ -1495,7 +1492,6 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr, DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); break; } @@ -3474,10 +3470,8 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { @@ -3639,10 +3633,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, if (!__is_valid_data_blkaddr(blkaddr)) continue; if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr, - DATA_GENERIC_ENHANCE))) { - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - } } while (count) { @@ -3924,8 +3916,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) DATA_GENERIC_ENHANCE)) { ret = -EFSCORRUPTED; f2fs_put_dnode(&dn); - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 96ba3585f1cf..ca1bf412b882 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1195,7 +1195,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } goto got_it; @@ -1214,7 +1213,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index) if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto put_page; } got_it: diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c381f0af33f3..e7bf15b8240a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -693,14 +693,12 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { err = -EFSCORRUPTED; - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto err; } @@ -755,8 +753,6 @@ retry_prev: f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u", dest, inode->i_ino, dn.ofs_in_node); err = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 20af48d7f784..edddbbd69e08 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -334,8 +334,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode) DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); ret = -EFSCORRUPTED; - f2fs_handle_error(sbi, - ERROR_INVALID_BLKADDR); goto out; } From 245930617c9bd85330c78e1a70775e1f61b12f7a Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Mon, 11 Mar 2024 15:48:54 +0800 Subject: [PATCH 56/60] f2fs: fix to handle error paths of {new,change}_curseg() {new,change}_curseg() may return error in some special cases, error handling should be did in their callers, and this will also facilitate subsequent error path expansion in {new,change}_curseg(). Signed-off-by: Zhiguo Niu Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/gc.c | 7 ++++-- fs/f2fs/segment.c | 57 +++++++++++++++++++++++++++++------------------ fs/f2fs/super.c | 4 +++- 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9814e5981a6a..9cda9fcfe351 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3700,10 +3700,10 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno); -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi); void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi); -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ca1bf412b882..8852814dab7f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -2035,8 +2035,11 @@ static int free_segment_range(struct f2fs_sb_info *sbi, mutex_unlock(&DIRTY_I(sbi)->seglist_lock); /* Move out cursegs from the target range */ - for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) - f2fs_allocate_segment_for_resize(sbi, type, start, end); + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) { + err = f2fs_allocate_segment_for_resize(sbi, type, start, end); + if (err) + goto out; + } /* do GC to move out valid blocks in the range */ err = f2fs_gc_range(sbi, start, end, dry_run, 0); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index edddbbd69e08..c9b0ef7302be 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2863,7 +2863,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type) +static int change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2888,21 +2888,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) if (IS_ERR(sum_page)) { /* GC won't be able to use stale summary pages by cp_error */ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); - return; + return PTR_ERR(sum_page); } sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); + return 0; } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, int alloc_mode, unsigned long long age); -static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, +static int get_atssr_segment(struct f2fs_sb_info *sbi, int type, int target_type, int alloc_mode, unsigned long long age) { struct curseg_info *curseg = CURSEG_I(sbi, type); + int ret = 0; curseg->seg_type = target_type; @@ -2910,38 +2912,41 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type, struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno); curseg->seg_type = se->type; - change_curseg(sbi, type); + ret = change_curseg(sbi, type); } else { /* allocate cold segment by default */ curseg->seg_type = CURSEG_COLD_DATA; - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); } stat_inc_seg_type(sbi, curseg); + return ret; } -static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) +static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC); + int ret = 0; if (!sbi->am.atgc_enabled) - return; + return 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); - get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0); + ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, + CURSEG_COLD_DATA, SSR, 0); up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); - + return ret; } -void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) +int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) { - __f2fs_init_atgc_curseg(sbi); + return __f2fs_init_atgc_curseg(sbi); } static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type) @@ -3069,11 +3074,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) return false; } -void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, +int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); @@ -3084,9 +3090,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, goto unlock; if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) - change_curseg(sbi, type); + ret = change_curseg(sbi, type); else - new_curseg(sbi, type, true); + ret = new_curseg(sbi, type, true); stat_inc_seg_type(sbi, curseg); @@ -3100,6 +3106,7 @@ unlock: mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); + return ret; } static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, @@ -3486,14 +3493,17 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; bool segment_full = false; + int ret = 0; f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); - if (curseg->segno == NULL_SEGNO) + if (curseg->segno == NULL_SEGNO) { + ret = -ENOSPC; goto out_err; + } if (from_gc) { f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO); @@ -3546,17 +3556,17 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } if (from_gc) { - get_atssr_segment(sbi, type, se->type, + ret = get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); } else { if (need_new_seg(sbi, type)) - new_curseg(sbi, type, false); + ret = new_curseg(sbi, type, false); else - change_curseg(sbi, type); + ret = change_curseg(sbi, type); stat_inc_seg_type(sbi, curseg); } - if (curseg->segno == NULL_SEGNO) + if (ret) goto out_err; } @@ -3599,7 +3609,7 @@ out_err: up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); - return -ENOSPC; + return ret; } @@ -3828,7 +3838,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -3854,12 +3865,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type); + if (change_curseg(sbi, type)) + goto out_unlock; } curseg->next_blkoff = old_blkoff; curseg->alloc_type = old_alloc_type; } +out_unlock: up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_write(&SM_I(sbi)->curseg_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c4c4c957a3c9..03f56a7c80fa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4680,7 +4680,9 @@ reset_checkpoint: if (err) goto free_meta; - f2fs_init_inmem_curseg(sbi); + err = f2fs_init_inmem_curseg(sbi); + if (err) + goto sync_free_meta; /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); From c644af1332833a5baa2b6a9220d1a1d988352b26 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 11 Mar 2024 16:59:19 -0700 Subject: [PATCH 57/60] f2fs: prevent atomic write on pinned file Since atomic write way was changed to out-place-update, we should prevent it on pinned files. Signed-off-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4dfe38e73130..8a2a44ab3d32 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2083,7 +2083,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) inode_lock(inode); - if (!f2fs_disable_compressed_file(inode)) { + if (!f2fs_disable_compressed_file(inode) || + f2fs_is_pinned_file(inode)) { ret = -EINVAL; goto out; } From 4bf78322346f6320313683dc9464e5423423ad5c Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:19 +0900 Subject: [PATCH 58/60] f2fs: mark inode dirty for FI_ATOMIC_COMMITTED flag In f2fs_update_inode, i_size of the atomic file isn't updated until FI_ATOMIC_COMMITTED flag is set. When committing atomic write right after the writeback of the inode, i_size of the raw inode will not be updated. It can cause the atomicity corruption due to a mismatch between old file size and new data. To prevent the problem, let's mark inode dirty for FI_ATOMIC_COMMITTED Atomic write thread Writeback thread __writeback_single_inode write_inode f2fs_update_inode - skip i_size update f2fs_ioc_commit_atomic_write f2fs_commit_atomic_write set_inode_flag(inode, FI_ATOMIC_COMMITTED) f2fs_do_sync_file f2fs_fsync_node_pages - skip f2fs_update_inode since the inode is clean Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9cda9fcfe351..4af3dfbd63c7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3037,6 +3037,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: + case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } From 74b0ebcbdde4c7fe23c979e4cfc2fdbf349c39a3 Mon Sep 17 00:00:00 2001 From: Sunmin Jeong Date: Wed, 13 Mar 2024 20:26:20 +0900 Subject: [PATCH 59/60] f2fs: truncate page cache before clearing flags when aborting atomic write In f2fs_do_write_data_page, FI_ATOMIC_FILE flag selects the target inode between the original inode and COW inode. When aborting atomic write and writeback occur simultaneously, invalid data can be written to original inode if the FI_ATOMIC_FILE flag is cleared meanwhile. To prevent the problem, let's truncate all pages before clearing the flag Atomic write thread Writeback thread f2fs_abort_atomic_write clear_inode_flag(inode, FI_ATOMIC_FILE) __writeback_single_inode do_writepages f2fs_do_write_data_page - use dn of original inode truncate_inode_pages_final Fixes: 3db1de0e582c ("f2fs: change the current atomic write way") Cc: stable@vger.kernel.org #v5.19+ Reviewed-by: Sungjong Seo Reviewed-by: Yeongjin Gil Signed-off-by: Sunmin Jeong Reviewed-by: Daeho Jeong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c9b0ef7302be..e9e8b9bb3ce9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -192,6 +192,9 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) if (!f2fs_is_atomic_file(inode)) return; + if (clean) + truncate_inode_pages_final(inode->i_mapping); + release_atomic_write_cnt(inode); clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); @@ -201,7 +204,6 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) F2FS_I(inode)->atomic_write_task = NULL; if (clean) { - truncate_inode_pages_final(inode->i_mapping); f2fs_i_size_write(inode, fi->original_i_size); fi->original_i_size = 0; } From eb70d5a6c932d9d23f4bb3e7b83782c21ac4b064 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 14 Mar 2024 10:05:28 +0800 Subject: [PATCH 60/60] f2fs: fix to avoid use-after-free issue in f2fs_filemap_fault syzbot reports a f2fs bug as below: BUG: KASAN: slab-use-after-free in f2fs_filemap_fault+0xd1/0x2c0 fs/f2fs/file.c:49 Read of size 8 at addr ffff88807bb22680 by task syz-executor184/5058 CPU: 0 PID: 5058 Comm: syz-executor184 Not tainted 6.7.0-syzkaller-09928-g052d534373b7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x163/0x540 mm/kasan/report.c:488 kasan_report+0x142/0x170 mm/kasan/report.c:601 f2fs_filemap_fault+0xd1/0x2c0 fs/f2fs/file.c:49 __do_fault+0x131/0x450 mm/memory.c:4376 do_shared_fault mm/memory.c:4798 [inline] do_fault mm/memory.c:4872 [inline] do_pte_missing mm/memory.c:3745 [inline] handle_pte_fault mm/memory.c:5144 [inline] __handle_mm_fault+0x23b7/0x72b0 mm/memory.c:5285 handle_mm_fault+0x27e/0x770 mm/memory.c:5450 do_user_addr_fault arch/x86/mm/fault.c:1364 [inline] handle_page_fault arch/x86/mm/fault.c:1507 [inline] exc_page_fault+0x456/0x870 arch/x86/mm/fault.c:1563 asm_exc_page_fault+0x26/0x30 arch/x86/include/asm/idtentry.h:570 The root cause is: in f2fs_filemap_fault(), vmf->vma may be not alive after filemap_fault(), so it may cause use-after-free issue when accessing vmf->vma->vm_flags in trace_f2fs_filemap_fault(). So it needs to keep vm_flags in separated temporary variable for tracepoint use. Fixes: 87f3afd366f7 ("f2fs: add tracepoint for f2fs_vm_page_mkwrite()") Reported-and-tested-by: syzbot+763afad57075d3f862f2@syzkaller.appspotmail.com Closes: https://lore.kernel.org/lkml/000000000000e8222b060f00db3b@google.com Cc: Ed Tsai Suggested-by: Hillf Danton Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8a2a44ab3d32..1761ad125f97 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -39,6 +39,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); + vm_flags_t flags = vmf->vma->vm_flags; vm_fault_t ret; ret = filemap_fault(vmf); @@ -46,7 +47,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) f2fs_update_iostat(F2FS_I_SB(inode), inode, APP_MAPPED_READ_IO, F2FS_BLKSIZE); - trace_f2fs_filemap_fault(inode, vmf->pgoff, vmf->vma->vm_flags, ret); + trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret); return ret; }