diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 1a4d83953379..cad6c3dc1f9c 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -331,7 +331,7 @@ Date: January 2018 Contact: Jaegeuk Kim Description: This indicates how many GC can be failed for the pinned file. If it exceeds this, F2FS doesn't guarantee its pinning - state. 2048 trials is set by default. + state. 2048 trials is set by default, and 65535 as maximum. What: /sys/fs/f2fs//extension_list Date: February 2018 diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index efc3493fd6f8..68a0885fb5e6 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -774,6 +774,35 @@ In order to identify whether the data in the victim segment are valid or not, F2FS manages a bitmap. Each bit represents the validity of a block, and the bitmap is composed of a bit stream covering whole blocks in main area. +Write-hint Policy +----------------- + +F2FS sets the whint all the time with the below policy. + +===================== ======================== =================== +User F2FS Block +===================== ======================== =================== +N/A META WRITE_LIFE_NONE|REQ_META +N/A HOT_NODE WRITE_LIFE_NONE +N/A WARM_NODE WRITE_LIFE_MEDIUM +N/A COLD_NODE WRITE_LIFE_LONG +ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME +extension list " " + +-- buffered io +N/A COLD_DATA WRITE_LIFE_EXTREME +N/A HOT_DATA WRITE_LIFE_SHORT +N/A WARM_DATA WRITE_LIFE_NOT_SET + +-- direct io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " WRITE_LIFE_NONE +WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM +WRITE_LIFE_LONG " WRITE_LIFE_LONG +===================== ======================== =================== + Fallocate(2) Policy ------------------- diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eac698b8dd38..55d444bec5c0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -179,22 +179,22 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, break; case META_SIT: if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) - goto err; + goto check_only; break; case META_SSA: if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || blkaddr < SM_I(sbi)->ssa_blkaddr)) - goto err; + goto check_only; break; case META_CP: if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || blkaddr < __start_cp_addr(sbi))) - goto err; + goto check_only; break; case META_POR: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) - goto err; + goto check_only; break; case DATA_GENERIC: case DATA_GENERIC_ENHANCE: @@ -228,6 +228,7 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return true; err: f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); +check_only: return false; } @@ -345,7 +346,7 @@ static int __f2fs_write_meta_page(struct page *page, { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - trace_f2fs_writepage(page, META); + trace_f2fs_writepage(page_folio(page), META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { @@ -492,7 +493,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, static bool f2fs_dirty_meta_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, META); + trace_f2fs_set_page_dirty(folio, META); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 8892c8262141..1ef82a546391 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -198,8 +198,8 @@ static int lzo_compress_pages(struct compress_ctx *cc) ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, &cc->clen, cc->private); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "lzo compress failed, ret:%d", ret); return -EIO; } return 0; @@ -212,17 +212,15 @@ static int lzo_decompress_pages(struct decompress_io_ctx *dic) ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, dic->rbuf, &dic->rlen); if (ret != LZO_E_OK) { - printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lzo decompress failed, ret:%d", ret); return -EIO; } if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lzo invalid rlen:%zu, expected:%lu", + dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -294,16 +292,15 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, dic->clen, dic->rlen); if (ret < 0) { - printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lz4 decompress failed, ret:%d", ret); return -EIO; } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, ret, - PAGE_SIZE << dic->log_cluster_size); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "lz4 invalid ret:%d, expected:%lu", + ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } return 0; @@ -350,9 +347,8 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) stream = zstd_init_cstream(¶ms, 0, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__); + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_init_cstream failed", __func__); kvfree(workspace); return -EIO; } @@ -390,16 +386,16 @@ static int zstd_compress_pages(struct compress_ctx *cc) ret = zstd_compress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_compress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } ret = zstd_end_stream(stream, &outbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n", - KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(cc->inode), + "%s zstd_end_stream returned %d", __func__, zstd_get_error_code(ret)); return -EIO; } @@ -432,9 +428,8 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, - __func__); + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s zstd_init_dstream failed", __func__); kvfree(workspace); return -EIO; } @@ -469,16 +464,15 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) ret = zstd_decompress_stream(stream, &outbuf, &inbuf); if (zstd_is_error(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n", - KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s zstd_decompress_stream failed, ret: %d", __func__, zstd_get_error_code(ret)); return -EIO; } if (dic->rlen != outbuf.pos) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD invalid rlen:%zu, " - "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, + f2fs_err_ratelimited(F2FS_I_SB(dic->inode), + "%s ZSTD invalid rlen:%zu, expected:%lu", __func__, dic->rlen, PAGE_SIZE << dic->log_cluster_size); return -EIO; @@ -1031,6 +1025,31 @@ static void set_cluster_writeback(struct compress_ctx *cc) } } +static void cancel_cluster_writeback(struct compress_ctx *cc, + struct compress_io_ctx *cic, int submitted) +{ + int i; + + /* Wait for submitted IOs. */ + if (submitted > 1) { + f2fs_submit_merged_write(F2FS_I_SB(cc->inode), DATA); + while (atomic_read(&cic->pending_pages) != + (cc->valid_nr_cpages - submitted + 1)) + f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT); + } + + /* Cancel writeback and stay locked. */ + for (i = 0; i < cc->cluster_size; i++) { + if (i < submitted) { + inode_inc_dirty_pages(cc->inode); + lock_page(cc->rpages[i]); + } + clear_page_private_gcing(cc->rpages[i]); + if (folio_test_writeback(page_folio(cc->rpages[i]))) + end_page_writeback(cc->rpages[i]); + } +} + static void set_cluster_dirty(struct compress_ctx *cc) { int i; @@ -1232,7 +1251,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .page = NULL, .encrypted_page = NULL, .compressed_page = NULL, - .submitted = 0, .io_type = io_type, .io_wbc = wbc, .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? @@ -1358,7 +1376,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, fio.compressed_page = cc->cpages[i - 1]; cc->cpages[i - 1] = NULL; + fio.submitted = 0; f2fs_outplace_write_data(&dn, &fio); + if (unlikely(!fio.submitted)) { + cancel_cluster_writeback(cc, cic, i); + + /* To call fscrypt_finalize_bounce_page */ + i = cc->valid_nr_cpages; + *submitted = 0; + goto out_destroy_crypt; + } (*submitted)++; unlock_continue: inode_dec_dirty_pages(cc->inode); @@ -1392,8 +1419,11 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, out_destroy_crypt: page_array_free(cc->inode, cic->rpages, cc->cluster_size); - for (--i; i >= 0; i--) + for (--i; i >= 0; i--) { + if (!cc->cpages[i]) + continue; fscrypt_finalize_bounce_page(&cc->cpages[i]); + } out_put_cic: kmem_cache_free(cic_entry_slab, cic); out_put_dnode: @@ -1484,7 +1514,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (!PageDirty(cc->rpages[i])) goto continue_unlock; - if (PageWriteback(cc->rpages[i])) { + if (folio_test_writeback(page_folio(cc->rpages[i]))) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 961e6ff77c72..b9b0debc6b3d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -465,6 +465,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) } else { bio->bi_end_io = f2fs_write_end_io; bio->bi_private = sbi; + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, + fio->type, fio->temp); } iostat_alloc_and_bind_ctx(sbi, bio, NULL); @@ -593,17 +595,20 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) return -ENOMEM; for (j = HOT; j < n; j++) { - init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); - sbi->write_io[i][j].sbi = sbi; - sbi->write_io[i][j].bio = NULL; - spin_lock_init(&sbi->write_io[i][j].io_lock); - INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); - INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); + struct f2fs_bio_info *io = &sbi->write_io[i][j]; + + init_f2fs_rwsem(&io->io_rwsem); + io->sbi = sbi; + io->bio = NULL; + io->last_block_in_bio = 0; + spin_lock_init(&io->io_lock); + INIT_LIST_HEAD(&io->io_list); + INIT_LIST_HEAD(&io->bio_list); + init_f2fs_rwsem(&io->bio_list_lock); #ifdef CONFIG_BLK_DEV_ZONED - init_completion(&sbi->write_io[i][j].zone_wait); - sbi->write_io[i][j].zone_pending_bio = NULL; - sbi->write_io[i][j].bi_private = NULL; + init_completion(&io->zone_wait); + io->zone_pending_bio = NULL; + io->bi_private = NULL; #endif } } @@ -1507,6 +1512,25 @@ static bool f2fs_map_blocks_cached(struct inode *inode, return true; } +static bool map_is_mergeable(struct f2fs_sb_info *sbi, + struct f2fs_map_blocks *map, + block_t blkaddr, int flag, int bidx, + int ofs) +{ + if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) + return false; + if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) + return true; + if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) + return true; + if (flag == F2FS_GET_BLOCK_PRE_DIO) + return true; + if (flag == F2FS_GET_BLOCK_DIO && + map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR) + return true; + return false; +} + /* * f2fs_map_blocks() tries to find or build mapping relationship which * maps continuous logical blocks to physical blocks, and return such @@ -1574,8 +1598,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) } /* use out-place-update for direct IO under LFS mode */ - if (map->m_may_create && - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { + if (map->m_may_create && (is_hole || + (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + !f2fs_is_pinned_file(inode)))) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto sync_out; @@ -1628,6 +1653,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) goto sync_out; } break; + case F2FS_GET_BLOCK_DIO: + if (map->m_next_pgofs) + *map->m_next_pgofs = pgofs + 1; + break; default: /* for defragment case */ if (map->m_next_pgofs) @@ -1646,19 +1675,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* reserved delalloc block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) map->m_flags |= F2FS_MAP_DELALLOC; - map->m_flags |= F2FS_MAP_MAPPED; + if (flag != F2FS_GET_BLOCK_DIO || !is_hole) + map->m_flags |= F2FS_MAP_MAPPED; map->m_pblk = blkaddr; map->m_len = 1; if (map->m_multidev_dio) map->m_bdev = FDEV(bidx).bdev; - } else if ((map->m_pblk != NEW_ADDR && - blkaddr == (map->m_pblk + ofs)) || - (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || - flag == F2FS_GET_BLOCK_PRE_DIO) { - if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) - goto sync_out; + } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) { ofs++; map->m_len++; } else { @@ -2042,7 +2067,7 @@ static inline loff_t f2fs_readpage_limit(struct inode *inode) return i_size_read(inode); } -static int f2fs_read_single_page(struct inode *inode, struct page *page, +static int f2fs_read_single_page(struct inode *inode, struct folio *folio, unsigned nr_pages, struct f2fs_map_blocks *map, struct bio **bio_ret, @@ -2055,9 +2080,10 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, sector_t last_block; sector_t last_block_in_file; sector_t block_nr; + pgoff_t index = folio_index(folio); int ret = 0; - block_in_file = (sector_t)page_index(page); + block_in_file = (sector_t)index; last_block = block_in_file + nr_pages; last_block_in_file = bytes_to_blks(inode, f2fs_readpage_limit(inode) + blocksize - 1); @@ -2088,7 +2114,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, got_it: if ((map->m_flags & F2FS_MAP_MAPPED)) { block_nr = map->m_pblk + block_in_file - map->m_lblk; - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { @@ -2097,15 +2123,15 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, } } else { zero_out: - zero_user_segment(page, 0, PAGE_SIZE); - if (f2fs_need_verity(inode, page->index) && - !fsverity_verify_page(page)) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (f2fs_need_verity(inode, index) && + !fsverity_verify_folio(folio)) { ret = -EIO; goto out; } - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_unlock(folio); goto out; } @@ -2115,14 +2141,14 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, */ if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, *last_block_in_bio, block_nr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, - is_readahead ? REQ_RAHEAD : 0, page->index, + is_readahead ? REQ_RAHEAD : 0, index, false); if (IS_ERR(bio)) { ret = PTR_ERR(bio); @@ -2137,7 +2163,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, */ f2fs_wait_on_block_writeback(inode, block_nr); - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); @@ -2324,7 +2350,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, * Major change was from block_size == page_size in f2fs by default. */ static int f2fs_mpage_readpages(struct inode *inode, - struct readahead_control *rac, struct page *page) + struct readahead_control *rac, struct folio *folio) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -2344,6 +2370,7 @@ static int f2fs_mpage_readpages(struct inode *inode, #endif unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; + pgoff_t index; int ret = 0; map.m_pblk = 0; @@ -2357,64 +2384,63 @@ static int f2fs_mpage_readpages(struct inode *inode, for (; nr_pages; nr_pages--) { if (rac) { - page = readahead_page(rac); - prefetchw(&page->flags); + folio = readahead_folio(rac); + prefetchw(&folio->flags); } + index = folio_index(folio); + #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_compressed_file(inode)) { - /* there are remained compressed pages, submit them */ - if (!f2fs_cluster_can_merge_page(&cc, page->index)) { - ret = f2fs_read_multi_pages(&cc, &bio, - max_nr_pages, - &last_block_in_bio, - rac != NULL, false); - f2fs_destroy_compress_ctx(&cc, false); - if (ret) - goto set_error_page; - } - if (cc.cluster_idx == NULL_CLUSTER) { - if (nc_cluster_idx == - page->index >> cc.log_cluster_size) { - goto read_single_page; - } + if (!f2fs_compressed_file(inode)) + goto read_single_page; - ret = f2fs_is_compressed_cluster(inode, page->index); - if (ret < 0) - goto set_error_page; - else if (!ret) { - nc_cluster_idx = - page->index >> cc.log_cluster_size; - goto read_single_page; - } - - nc_cluster_idx = NULL_CLUSTER; - } - ret = f2fs_init_compress_ctx(&cc); + /* there are remained compressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + rac != NULL, false); + f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; - - f2fs_compress_ctx_add_page(&cc, page); - - goto next_page; } + if (cc.cluster_idx == NULL_CLUSTER) { + if (nc_cluster_idx == index >> cc.log_cluster_size) + goto read_single_page; + + ret = f2fs_is_compressed_cluster(inode, index); + if (ret < 0) + goto set_error_page; + else if (!ret) { + nc_cluster_idx = + index >> cc.log_cluster_size; + goto read_single_page; + } + + nc_cluster_idx = NULL_CLUSTER; + } + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, &folio->page); + + goto next_page; read_single_page: #endif - ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map, &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: #endif - zero_user_segment(page, 0, PAGE_SIZE); - unlock_page(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_unlock(folio); } #ifdef CONFIG_F2FS_FS_COMPRESSION next_page: #endif - if (rac) - put_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { @@ -2436,22 +2462,21 @@ static int f2fs_mpage_readpages(struct inode *inode, static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; int ret = -EAGAIN; - trace_f2fs_readpage(page, DATA); + trace_f2fs_readpage(folio, DATA); if (!f2fs_is_compress_backend_ready(inode)) { - unlock_page(page); + folio_unlock(folio); return -EOPNOTSUPP; } /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) - ret = f2fs_read_inline_data(inode, page); + ret = f2fs_read_inline_data(inode, folio); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(inode, NULL, page); + ret = f2fs_mpage_readpages(inode, NULL, folio); return ret; } @@ -2685,12 +2710,11 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - if (PageWriteback(page)) - end_page_writeback(page); + end_page_writeback(page); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(fio->page, IPU); + trace_f2fs_do_write_data_page(page_folio(page), IPU); return err; } @@ -2719,7 +2743,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); + trace_f2fs_do_write_data_page(page_folio(page), OPU); set_inode_flag(inode, FI_APPEND_WRITE); out_writepage: f2fs_put_dnode(&dn); @@ -2766,7 +2790,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .last_block = last_block, }; - trace_f2fs_writepage(page, DATA); + trace_f2fs_writepage(page_folio(page), DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { @@ -3379,7 +3403,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(page_folio(page), ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); @@ -3740,7 +3764,7 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping, { struct inode *inode = mapping->host; - trace_f2fs_set_page_dirty(&folio->page, DATA); + trace_f2fs_set_page_dirty(folio, DATA); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); @@ -3896,15 +3920,14 @@ static int check_swap_activate(struct swap_info_struct *sis, struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - sector_t cur_lblock; - sector_t last_lblock; - sector_t pblock; - sector_t lowest_pblock = -1; - sector_t highest_pblock = 0; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + block_t lowest_pblock = -1; + block_t highest_pblock = 0; int nr_extents = 0; - unsigned long nr_pblocks; + unsigned int nr_pblocks; unsigned int blks_per_sec = BLKS_PER_SEC(sbi); - unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; unsigned int not_aligned = 0; int ret = 0; @@ -3942,8 +3965,8 @@ static int check_swap_activate(struct swap_info_struct *sis, pblock = map.m_pblk; nr_pblocks = map.m_len; - if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask || + if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || + nr_pblocks % blks_per_sec || !f2fs_valid_pinned_area(sbi, pblock)) { bool last_extent = false; @@ -4160,7 +4183,8 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, map.m_lblk = bytes_to_blks(inode, offset); map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); if (flags & IOMAP_WRITE) map.m_may_create = true; @@ -4181,12 +4205,13 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, * We should never see delalloc or compressed extents here based on * prior flushing and checks. */ - if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) - return -EINVAL; if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) return -EINVAL; - if (map.m_pblk != NULL_ADDR) { + if (map.m_flags & F2FS_MAP_MAPPED) { + if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) + return -EINVAL; + iomap->length = blks_to_bytes(inode, map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; @@ -4195,9 +4220,17 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } else { if (flags & IOMAP_WRITE) return -ENOTBLK; - iomap->length = blks_to_bytes(inode, next_pgofs) - - iomap->offset; - iomap->type = IOMAP_HOLE; + + if (map.m_pblk == NULL_ADDR) { + iomap->length = blks_to_bytes(inode, next_pgofs) - + iomap->offset; + iomap->type = IOMAP_HOLE; + } else if (map.m_pblk == NEW_ADDR) { + iomap->length = blks_to_bytes(inode, map.m_len); + iomap->type = IOMAP_UNWRITTEN; + } else { + f2fs_bug_on(F2FS_I_SB(inode), 1); + } iomap->addr = IOMAP_NULL_ADDR; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fced2b7652f4..1974b6aff397 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -72,7 +72,7 @@ enum { struct f2fs_fault_info { atomic_t inject_ops; - unsigned int inject_rate; + int inject_rate; unsigned int inject_type; }; @@ -765,11 +765,6 @@ enum { #define DEF_DIR_LEVEL 0 -enum { - GC_FAILURE_PIN, - MAX_GC_FAILURE -}; - /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -816,9 +811,10 @@ struct f2fs_inode_info { unsigned long i_flags; /* keep an inode flags for ioctl */ unsigned char i_advise; /* use to give file attribute hints */ unsigned char i_dir_level; /* use for dentry level for large dir */ - unsigned int i_current_depth; /* only for directory depth */ - /* for gc failure statistic */ - unsigned int i_gc_failures[MAX_GC_FAILURE]; + union { + unsigned int i_current_depth; /* only for directory depth */ + unsigned short i_gc_failures; /* for gc failure statistic */ + }; unsigned int i_pino; /* parent inode number */ umode_t i_acl_mode; /* keep file acl mode temporarily */ @@ -1557,6 +1553,7 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ + unsigned int max_open_zones; /* max open zone resources of the zoned device */ #endif /* for node-related operations */ @@ -1676,7 +1673,7 @@ struct f2fs_sb_info { unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ - u64 gc_pin_file_threshold; + unsigned short gc_pin_file_threshold; struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ @@ -2309,7 +2306,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count, bool partial) { - blkcnt_t diff = 0, release = 0; + long long diff = 0, release = 0; block_t avail_user_block_count; int ret; @@ -2329,26 +2326,27 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); spin_lock(&sbi->stat_lock); - sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = get_available_block_count(sbi, inode, true); - if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + avail_user_block_count = get_available_block_count(sbi, inode, true); + diff = (long long)sbi->total_valid_block_count + *count - + avail_user_block_count; + if (unlikely(diff > 0)) { if (!partial) { spin_unlock(&sbi->stat_lock); + release = *count; goto enospc; } - - diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); goto enospc; } } + sbi->total_valid_block_count += (block_t)(*count); + spin_unlock(&sbi->stat_lock); if (unlikely(release)) { @@ -3132,7 +3130,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) static inline void f2fs_i_gc_failures_write(struct inode *inode, unsigned int count) { - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count; + F2FS_I(inode)->i_gc_failures = count; f2fs_mark_inode_dirty_sync(inode, true); } @@ -3497,6 +3495,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, @@ -3719,6 +3719,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); +int f2fs_get_segment_temp(int seg_type); int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, @@ -3741,7 +3742,9 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); void f2fs_destroy_segment_manager_caches(void); -int f2fs_rw_hint_to_seg_type(enum rw_hint hint); +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp); unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int segno); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, @@ -4148,10 +4151,10 @@ extern struct kmem_cache *f2fs_inode_entry_slab; bool f2fs_may_inline_data(struct inode *inode); bool f2fs_sanity_check_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); -void f2fs_do_read_inline_data(struct page *page, struct page *ipage); +void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage); void f2fs_truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); -int f2fs_read_inline_data(struct inode *inode, struct page *page); +int f2fs_read_inline_data(struct inode *inode, struct folio *folio); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); @@ -4596,10 +4599,14 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) } #ifdef CONFIG_F2FS_FAULT_INJECTION -extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type); +extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type); #else -#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0) +static inline int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, + unsigned long rate, unsigned long type) +{ + return 0; +} #endif static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) @@ -4657,7 +4664,7 @@ static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, page = find_get_page(META_MAPPING(sbi), blkaddr + i); if (page) { - if (PageWriteback(page)) + if (folio_test_writeback(page_folio(page))) need_submit = true; f2fs_put_page(page, 0); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2b65e09822d4..5c0b281a70f3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -58,7 +58,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - bool need_alloc = true; + bool need_alloc = !f2fs_is_pinned_file(inode); int err = 0; vm_fault_t ret; @@ -115,19 +115,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } + set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_block_locked(&dn, page->index); - } - -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (!need_alloc) { - set_new_dnode(&dn, inode, NULL, NULL, 0); + } else { err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); f2fs_put_dnode(&dn); + if (f2fs_is_pinned_file(inode) && + !__is_valid_data_blkaddr(dn.data_blkaddr)) + err = -EIO; } -#endif + if (err) { unlock_page(page); goto out_sem; @@ -834,7 +833,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. */ - if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE)) + if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) && + !f2fs_is_pinned_file(inode)) return true; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) return true; @@ -952,9 +952,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size))) + return -EINVAL; + } err = setattr_prepare(idmap, dentry, attr); if (err) @@ -1325,6 +1330,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } + + f2fs_wait_on_page_writeback(pdst, DATA, true, true); + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); set_page_private_gcing(pdst); @@ -1817,15 +1825,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - /* - * Pinned file should not support partial truncation since the block - * can be used by applications. - */ - if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1833,6 +1832,17 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; @@ -2224,34 +2234,13 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) return ret; } -static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, + bool readonly) { - struct inode *inode = file_inode(filp); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct super_block *sb = sbi->sb; - __u32 in; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__u32 __user *)arg)) - return -EFAULT; - - if (in != F2FS_GOING_DOWN_FULLSYNC) { - ret = mnt_want_write_file(filp); - if (ret) { - if (ret == -EROFS) { - ret = 0; - f2fs_stop_checkpoint(sbi, false, - STOP_CP_REASON_SHUTDOWN); - trace_f2fs_shutdown(sbi, in, ret); - } - return ret; - } - } - - switch (in) { + switch (flag) { case F2FS_GOING_DOWN_FULLSYNC: ret = bdev_freeze(sb->s_bdev); if (ret) @@ -2290,6 +2279,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) goto out; } + if (readonly) + goto out; + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); @@ -2298,10 +2290,44 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_update_time(sbi, REQ_TIME); out: - if (in != F2FS_GOING_DOWN_FULLSYNC) - mnt_drop_write_file(filp); - trace_f2fs_shutdown(sbi, in, ret); + trace_f2fs_shutdown(sbi, flag, ret); + + return ret; +} + +static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + __u32 in; + int ret; + bool need_drop = false, readonly = false; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__u32 __user *)arg)) + return -EFAULT; + + if (in != F2FS_GOING_DOWN_FULLSYNC) { + ret = mnt_want_write_file(filp); + if (ret) { + if (ret != -EROFS) + return ret; + + /* fallback to nosync shutdown for readonly fs */ + in = F2FS_GOING_DOWN_NOSYNC; + readonly = true; + } else { + need_drop = true; + } + } + + ret = f2fs_do_shutdown(sbi, in, readonly); + + if (need_drop) + mnt_drop_write_file(filp); return ret; } @@ -2354,13 +2380,14 @@ static bool uuid_is_nonzero(__u8 u[16]) static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + int ret; if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode))) return -EOPNOTSUPP; + ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - - return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); + return ret; } static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) @@ -2607,12 +2634,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - pg_start = range->start >> PAGE_SHIFT; - pg_end = (range->start + range->len) >> PAGE_SHIFT; - f2fs_balance_fs(sbi, true); inode_lock(inode); + pg_start = range->start >> PAGE_SHIFT; + pg_end = min_t(pgoff_t, + (range->start + range->len) >> PAGE_SHIFT, + DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { err = -EINVAL; @@ -2627,8 +2655,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, } /* writeback all dirty pages in the range */ - err = filemap_write_and_wait_range(inode->i_mapping, range->start, - range->start + range->len - 1); + err = filemap_write_and_wait_range(inode->i_mapping, + pg_start << PAGE_SHIFT, + (pg_end << PAGE_SHIFT) - 1); if (err) goto out; @@ -2786,7 +2815,8 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) err = f2fs_defragment_range(sbi, filp, &range); mnt_drop_write_file(filp); - f2fs_update_time(sbi, REQ_TIME); + if (range.len) + f2fs_update_time(sbi, REQ_TIME); if (err < 0) return err; @@ -2837,7 +2867,8 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } - if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) { + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { ret = -EOPNOTSUPP; goto out_unlock; } @@ -3189,18 +3220,17 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - /* Use i_gc_failures for normal file as a risk signal. */ - if (inc) - f2fs_i_gc_failures_write(inode, - fi->i_gc_failures[GC_FAILURE_PIN] + 1); - - if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + __func__, inode->i_ino, fi->i_gc_failures); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } + + /* Use i_gc_failures for normal file as a risk signal. */ + if (inc) + f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1); + return 0; } @@ -3234,7 +3264,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) goto done; } - if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { + if (F2FS_HAS_BLOCKS(inode)) { ret = -EFBIG; goto out; } @@ -3261,7 +3291,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) } set_inode_flag(inode, FI_PIN_FILE); - ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + ret = F2FS_I(inode)->i_gc_failures; done: f2fs_update_time(sbi, REQ_TIME); out: @@ -3276,7 +3306,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) __u32 pin = 0; if (is_inode_flag_set(inode, FI_PIN_FILE)) - pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; + pin = F2FS_I(inode)->i_gc_failures; return put_user(pin, (u32 __user *)arg); } @@ -3522,9 +3552,6 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3543,7 +3570,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3570,9 +3598,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3590,6 +3621,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3600,6 +3633,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); out: + if (released_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); @@ -3641,7 +3676,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, while (count) { int compr_blocks = 0; - blkcnt_t reserved; + blkcnt_t reserved = 0; + blkcnt_t to_reserved; int ret; for (i = 0; i < cluster_size; i++) { @@ -3661,20 +3697,26 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, * fails in release_compress_blocks(), so NEW_ADDR * is a possible case. */ - if (blkaddr == NEW_ADDR || - __is_valid_data_blkaddr(blkaddr)) { + if (blkaddr == NEW_ADDR) { + reserved++; + continue; + } + if (__is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } } - reserved = cluster_size - compr_blocks; + to_reserved = cluster_size - compr_blocks - reserved; /* for the case all blocks in cluster were reserved */ - if (reserved == 1) + if (to_reserved == 1) { + dn->ofs_in_node += cluster_size; goto next; + } - ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + ret = inc_valid_block_count(sbi, dn->inode, + &to_reserved, false); if (unlikely(ret)) return ret; @@ -3685,7 +3727,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - *reserved_blocks += reserved; + *reserved_blocks += to_reserved; next: count -= cluster_size; } @@ -3704,9 +3746,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3718,7 +3757,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } @@ -3735,9 +3775,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3755,6 +3798,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3770,6 +3815,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) f2fs_mark_inode_dirty_sync(inode, true); } unlock_inode: + if (reserved_blocks) + f2fs_update_time(sbi, REQ_TIME); inode_unlock(inode); mnt_drop_write_file(filp); @@ -3778,7 +3825,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) } else if (reserved_blocks && atomic_read(&F2FS_I(inode)->i_compr_blocks)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx " + f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx " "iblocks=%llu, reserved=%u, compr_blocks=%u, " "run fsck to fix.", __func__, inode->i_ino, inode->i_blocks, @@ -3966,6 +4013,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (len) ret = f2fs_secure_erase(prev_bdev, inode, prev_index, prev_block, len, range.flags); + f2fs_update_time(sbi, REQ_TIME); out: filemap_invalidate_unlock(mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -4119,9 +4167,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); file_start_write(filp); @@ -4132,7 +4177,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4175,6 +4221,7 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); file_end_write(filp); @@ -4197,9 +4244,6 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); file_start_write(filp); @@ -4210,7 +4254,8 @@ static int f2fs_ioc_compress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4254,6 +4299,7 @@ static int f2fs_ioc_compress_file(struct file *filp) if (ret) f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.", __func__, ret); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); file_end_write(filp); @@ -4612,7 +4658,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, map.m_may_create = true; if (dio) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, + inode->i_write_hint); flag = F2FS_GET_BLOCK_PRE_DIO; } else { map.m_seg_type = NO_CHECK_TYPE; @@ -4660,8 +4707,21 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, return 0; } +static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, + struct bio *bio, loff_t file_offset) +{ + struct inode *inode = iter->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); + enum temp_type temp = f2fs_get_segment_temp(seg_type); + + bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); + submit_bio(bio); +} + static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { - .end_io = f2fs_dio_write_end_io, + .end_io = f2fs_dio_write_end_io, + .submit_io = f2fs_dio_write_submit_io, }; static void f2fs_flush_buffered_write(struct address_space *mapping, @@ -4798,6 +4858,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) bool dio; bool may_need_sync = true; int preallocated; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); ssize_t ret; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -4819,6 +4881,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } + if (f2fs_is_pinned_file(inode) && + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EIO; + goto out_unlock; + } + ret = f2fs_write_checks(iocb, from); if (ret <= 0) goto out_unlock; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8852814dab7f..6066c6eecf41 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1434,7 +1434,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; if (gc_type == BG_GC) { - if (PageWriteback(page)) { + if (folio_test_writeback(page_folio(page))) { err = -EAGAIN; goto out; } @@ -1554,10 +1554,15 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, int err; inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode) || - special_file(inode->i_mode)) + if (IS_ERR(inode)) continue; + if (is_bad_inode(inode) || + special_file(inode->i_mode)) { + iput(inode); + continue; + } + err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { iput(inode); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 9c0d06c4d19a..a8ea3301b815 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -26,6 +26,7 @@ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ #define DEF_GC_FAILED_PINNED_FILES 2048 +#define MAX_GC_FAILED_PINNED_FILES USHRT_MAX /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ac00423f117b..7638d0d7b7ee 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -61,22 +61,22 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void f2fs_do_read_inline_data(struct page *page, struct page *ipage) +void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio_file_mapping(folio)->host; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) return; - f2fs_bug_on(F2FS_P_SB(page), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio_index(folio)); - zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE); + folio_zero_segment(folio, MAX_INLINE_DATA(inode), folio_size(folio)); /* Copy the whole inline data block */ - memcpy_to_page(page, 0, inline_data_addr(inode, ipage), + memcpy_to_folio(folio, 0, inline_data_addr(inode, ipage), MAX_INLINE_DATA(inode)); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); } void f2fs_truncate_inline_inode(struct inode *inode, @@ -97,13 +97,13 @@ void f2fs_truncate_inline_inode(struct inode *inode, clear_inode_flag(inode, FI_DATA_EXIST); } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +int f2fs_read_inline_data(struct inode *inode, struct folio *folio) { struct page *ipage; ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { - unlock_page(page); + folio_unlock(folio); return PTR_ERR(ipage); } @@ -112,15 +112,15 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return -EAGAIN; } - if (page->index) - zero_user_segment(page, 0, PAGE_SIZE); + if (folio_index(folio)) + folio_zero_segment(folio, 0, folio_size(folio)); else - f2fs_do_read_inline_data(page, ipage); + f2fs_do_read_inline_data(folio, ipage); - if (!PageUptodate(page)) - SetPageUptodate(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); f2fs_put_page(ipage, 1); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -164,9 +164,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) return -EFSCORRUPTED; } - f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); + f2fs_bug_on(F2FS_P_SB(page), folio_test_writeback(page_folio(page))); - f2fs_do_read_inline_data(page, dn->inode_page); + f2fs_do_read_inline_data(page_folio(page), dn->inode_page); set_page_dirty(page); /* clear dirty state */ diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c26effdce9aa..005dde72aff3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -161,7 +161,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (!f2fs_enable_inode_chksum(sbi, page)) #else if (!f2fs_enable_inode_chksum(sbi, page) || - PageDirty(page) || PageWriteback(page)) + PageDirty(page) || + folio_test_writeback(page_folio(page))) #endif return true; @@ -361,6 +362,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + return true; } @@ -408,8 +415,7 @@ static int do_read_inode(struct inode *inode) if (S_ISDIR(inode->i_mode)) fi->i_current_depth = le32_to_cpu(ri->i_current_depth); else if (S_ISREG(inode->i_mode)) - fi->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(ri->i_gc_failures); + fi->i_gc_failures = le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); if (S_ISREG(inode->i_mode)) @@ -679,8 +685,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_current_depth = cpu_to_le32(F2FS_I(inode)->i_current_depth); else if (S_ISREG(inode->i_mode)) - ri->i_gc_failures = - cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]); + ri->i_gc_failures = cpu_to_le16(F2FS_I(inode)->i_gc_failures); ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid); ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); @@ -804,6 +809,7 @@ void f2fs_evict_inode(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t xnid = fi->i_xattr_nid; int err = 0; + bool freeze_protected = false; f2fs_abort_atomic_write(inode, true); @@ -843,8 +849,10 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) { sb_start_intwrite(inode->i_sb); + freeze_protected = true; + } set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -887,7 +895,7 @@ void f2fs_evict_inode(struct inode *inode) if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + if (freeze_protected) sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b3de6d6cdb02..b72ef96f7e33 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1187,7 +1187,17 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) default: BUG(); } - if (err < 0 && err != -ENOENT) + if (err == -ENOENT) { + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_err_ratelimited(sbi, + "truncate node fail, ino:%lu, nid:%u, " + "offset[0]:%d, offset[1]:%d, nofs:%d", + inode->i_ino, dn.nid, offset[0], + offset[1], nofs); + err = 0; + } + if (err < 0) goto fail; if (offset[1] == 0 && ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { @@ -1319,6 +1329,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) } if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; + dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; @@ -1345,7 +1356,6 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); return page; - fail: clear_node_page_dirty(page); f2fs_put_page(page, 1); @@ -1614,7 +1624,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page, NODE); + trace_f2fs_writepage(page_folio(page), NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ @@ -1733,7 +1743,7 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) goto release_page; } else { /* set page dirty and write it */ - if (!PageWriteback(node_page)) + if (!folio_test_writeback(page_folio(node_page))) set_page_dirty(node_page); } out_page: @@ -2161,7 +2171,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, static bool f2fs_dirty_node_folio(struct address_space *mapping, struct folio *folio) { - trace_f2fs_set_page_dirty(&folio->page, NODE); + trace_f2fs_set_page_dirty(folio, NODE); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e7bf15b8240a..496aee53c38a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -330,8 +330,7 @@ static int recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); f2fs_set_inode_flags(inode); - F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = - le16_to_cpu(raw->i_gc_failures); + F2FS_I(inode)->i_gc_failures = le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4fd76e867e0a..a0ce3d080f80 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -771,8 +771,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, block_t valid_blocks = get_valid_blocks(sbi, segno, true); - f2fs_bug_on(sbi, unlikely(!valid_blocks || - valid_blocks == CAP_BLKS_PER_SEC(sbi))); + f2fs_bug_on(sbi, + (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !valid_blocks) || + valid_blocks == CAP_BLKS_PER_SEC(sbi)); if (!IS_CURSEC(sbi, secno)) set_bit(secno, dirty_i->dirty_secmap); @@ -1109,9 +1111,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - printk_ratelimited( - "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, sbi->sb->s_id, + f2fs_info_ratelimited(sbi, + "Issue discard(%u, %u, %u) failed, ret: %d", dc->di.lstart, dc->di.start, dc->di.len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -2645,7 +2646,7 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, } static int is_next_segment_free(struct f2fs_sb_info *sbi, - struct curseg_info *curseg, int type) + struct curseg_info *curseg) { unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); @@ -3073,8 +3074,7 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type) if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && curseg->seg_type == CURSEG_WARM_NODE) return true; - if (curseg->alloc_type == LFS && - is_next_segment_free(sbi, curseg, type) && + if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0)) @@ -3352,8 +3352,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) return err; } -int f2fs_rw_hint_to_seg_type(enum rw_hint hint) +int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint) { + if (F2FS_OPTION(sbi).active_logs == 2) + return CURSEG_HOT_DATA; + else if (F2FS_OPTION(sbi).active_logs == 4) + return CURSEG_COLD_DATA; + + /* active_log == 6 */ switch (hint) { case WRITE_LIFE_SHORT: return CURSEG_HOT_DATA; @@ -3364,6 +3370,65 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint) } } +/* + * This returns write hints for each segment type. This hints will be + * passed down to block layer as below by default. + * + * User F2FS Block + * ---- ---- ----- + * META WRITE_LIFE_NONE|REQ_META + * HOT_NODE WRITE_LIFE_NONE + * WARM_NODE WRITE_LIFE_MEDIUM + * COLD_NODE WRITE_LIFE_LONG + * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME + * extension list " " + * + * -- buffered io + * COLD_DATA WRITE_LIFE_EXTREME + * HOT_DATA WRITE_LIFE_SHORT + * WARM_DATA WRITE_LIFE_NOT_SET + * + * -- direct io + * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME + * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT + * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET + * WRITE_LIFE_NONE " WRITE_LIFE_NONE + * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM + * WRITE_LIFE_LONG " WRITE_LIFE_LONG + */ +enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, + enum page_type type, enum temp_type temp) +{ + switch (type) { + case DATA: + switch (temp) { + case WARM: + return WRITE_LIFE_NOT_SET; + case HOT: + return WRITE_LIFE_SHORT; + case COLD: + return WRITE_LIFE_EXTREME; + default: + return WRITE_LIFE_NONE; + } + case NODE: + switch (temp) { + case WARM: + return WRITE_LIFE_MEDIUM; + case HOT: + return WRITE_LIFE_NONE; + case COLD: + return WRITE_LIFE_LONG; + default: + return WRITE_LIFE_NONE; + } + case META: + return WRITE_LIFE_NONE; + default: + return WRITE_LIFE_NONE; + } +} + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -3434,7 +3499,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) is_inode_flag_set(inode, FI_HOT_DATA) || f2fs_is_cow_file(inode)) return CURSEG_HOT_DATA; - return f2fs_rw_hint_to_seg_type(inode->i_write_hint); + return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), + inode->i_write_hint); } else { if (IS_DNODE(fio->page)) return is_cold_node(fio->page) ? CURSEG_WARM_NODE : @@ -3443,6 +3509,15 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } } +int f2fs_get_segment_temp(int seg_type) +{ + if (IS_HOT(seg_type)) + return HOT; + else if (IS_WARM(seg_type)) + return WARM; + return COLD; +} + static int __get_segment_type(struct f2fs_io_info *fio) { int type = 0; @@ -3461,12 +3536,8 @@ static int __get_segment_type(struct f2fs_io_info *fio) f2fs_bug_on(fio->sbi, true); } - if (IS_HOT(type)) - fio->temp = HOT; - else if (IS_WARM(type)) - fio->temp = WARM; - else - fio->temp = COLD; + fio->temp = f2fs_get_segment_temp(type); + return type; } @@ -3559,6 +3630,8 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (segment_full) { if (type == CURSEG_COLD_DATA_PINNED && !((curseg->segno + 1) % sbi->segs_per_sec)) { + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); reset_curseg_fields(curseg); goto skip_new_segment; } @@ -3612,13 +3685,13 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return 0; + out_err: *new_blkaddr = NULL_ADDR; up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); f2fs_up_read(&SM_I(sbi)->curseg_lock); return ret; - } void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, @@ -3660,8 +3733,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) &fio->new_blkaddr, sum, type, fio)) { if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - if (PageWriteback(fio->page)) - end_page_writeback(fio->page); + end_page_writeback(fio->page); if (f2fs_in_warm_node_list(fio->sbi, fio->page)) f2fs_del_fsync_node_entry(fio->sbi, fio->page); goto out; @@ -3904,7 +3976,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked) { - if (PageWriteback(page)) { + if (folio_test_writeback(page_folio(page))) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); /* submit cached LFS IO */ @@ -3913,7 +3985,8 @@ void f2fs_wait_on_page_writeback(struct page *page, f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); - f2fs_bug_on(sbi, locked && PageWriteback(page)); + f2fs_bug_on(sbi, locked && + folio_test_writeback(page_folio(page))); } else { wait_for_stable_page(page); } @@ -4959,17 +5032,6 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) } #ifdef CONFIG_BLK_DEV_ZONED -static const char *f2fs_zone_status[BLK_ZONE_COND_OFFLINE + 1] = { - [BLK_ZONE_COND_NOT_WP] = "NOT_WP", - [BLK_ZONE_COND_EMPTY] = "EMPTY", - [BLK_ZONE_COND_IMP_OPEN] = "IMPLICIT_OPEN", - [BLK_ZONE_COND_EXP_OPEN] = "EXPLICIT_OPEN", - [BLK_ZONE_COND_CLOSED] = "CLOSED", - [BLK_ZONE_COND_READONLY] = "READONLY", - [BLK_ZONE_COND_FULL] = "FULL", - [BLK_ZONE_COND_OFFLINE] = "OFFLINE", -}; - static int check_zone_write_pointer(struct f2fs_sb_info *sbi, struct f2fs_dev_info *fdev, struct blk_zone *zone) @@ -5000,7 +5062,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) { f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]", zone_segno, valid_block_cnt, - f2fs_zone_status[zone->cond]); + blk_zone_cond_str(zone->cond)); return 0; } @@ -5011,7 +5073,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, if (!valid_block_cnt) { f2fs_notice(sbi, "Zone without valid block has non-zero write " "pointer. Reset the write pointer: cond[%s]", - f2fs_zone_status[zone->cond]); + blk_zone_cond_str(zone->cond)); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); if (ret) @@ -5029,7 +5091,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, */ f2fs_notice(sbi, "Valid blocks are not aligned with write " "pointer: valid block[0x%x,0x%x] cond[%s]", - zone_segno, valid_block_cnt, f2fs_zone_status[zone->cond]); + zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond)); nofs_flags = memalloc_nofs_save(); ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a4bc26dfdb1a..1f1b3647a998 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -66,21 +66,31 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_NO_SEGMENT] = "no free segment", }; -void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, - unsigned int type) +int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, + unsigned long type) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; if (rate) { + if (rate > INT_MAX) + return -EINVAL; atomic_set(&ffi->inject_ops, 0); - ffi->inject_rate = rate; + ffi->inject_rate = (int)rate; } - if (type) - ffi->inject_type = type; + if (type) { + if (type >= BIT(FAULT_MAX)) + return -EINVAL; + ffi->inject_type = (unsigned int)type; + } if (!rate && !type) memset(ffi, 0, sizeof(struct f2fs_fault_info)); + else + f2fs_info(sbi, + "build fault injection attr: rate: %lu, type: 0x%lx", + rate, type); + return 0; } #endif @@ -886,14 +896,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; - f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); + if (f2fs_build_fault_attr(sbi, arg, + F2FS_ALL_FAULT_TYPE)) + return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; - f2fs_build_fault_attr(sbi, 0, arg); + if (f2fs_build_fault_attr(sbi, 0, arg)) + return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; #else @@ -2132,8 +2145,6 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE; - sbi->sb->s_flags &= ~SB_INLINECRYPT; - set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -2326,6 +2337,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (err) goto restore_opts; +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && + sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + err = -EINVAL; + goto restore_opts; + } +#endif + /* flush outstanding errors before changing fs state */ flush_work(&sbi->s_error_work); @@ -2547,6 +2569,11 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) return err; } +static void f2fs_shutdown(struct super_block *sb) +{ + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); +} + #ifdef CONFIG_QUOTA static bool f2fs_need_recovery(struct f2fs_sb_info *sbi) { @@ -3146,6 +3173,7 @@ static const struct super_operations f2fs_sops = { .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, .remount_fs = f2fs_remount, + .shutdown = f2fs_shutdown, }; #ifdef CONFIG_FS_ENCRYPTION @@ -3441,7 +3469,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } } - /* Currently, support only 4KB block size */ + /* only support block_size equals to PAGE_SIZE */ if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) { f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u", le32_to_cpu(raw_super->log_blocksize), @@ -3862,11 +3890,24 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; u64 zone_sectors; + unsigned int max_open_zones; int ret; if (!f2fs_sb_has_blkzoned(sbi)) return 0; + if (bdev_is_zoned(FDEV(devi).bdev)) { + max_open_zones = bdev_max_open_zones(bdev); + if (max_open_zones && (max_open_zones < sbi->max_open_zones)) + sbi->max_open_zones = max_open_zones; + if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) { + f2fs_err(sbi, + "zoned: max open zones %u is too small, need at least %u open zones", + sbi->max_open_zones, F2FS_OPTION(sbi).active_logs); + return -EINVAL; + } + } + zone_sectors = bdev_zone_sectors(bdev); if (sbi->blocks_per_blkz && sbi->blocks_per_blkz != SECTOR_TO_BLOCK(zone_sectors)) @@ -4131,9 +4172,15 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (shutdown) set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - /* continue filesystem operators if errors=continue */ - if (continue_fs || f2fs_readonly(sb)) + /* + * Continue filesystem operators if errors=continue. Should not set + * RO by shutdown, since RO bypasses thaw_super which can hang the + * system. + */ + if (continue_fs || f2fs_readonly(sb) || shutdown) { + f2fs_warn(sbi, "Stopped filesystem due to reason: %d", reason); return; + } f2fs_warn(sbi, "Remounting filesystem read-only"); /* @@ -4180,6 +4227,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev); sbi->aligned_blksize = true; +#ifdef CONFIG_BLK_DEV_ZONED + sbi->max_open_zones = UINT_MAX; +#endif for (i = 0; i < max_devices; i++) { if (i == 0) @@ -4894,12 +4944,6 @@ static int __init init_f2fs_fs(void) { int err; - if (PAGE_SIZE != F2FS_BLKSIZE) { - printk("F2FS not supported on PAGE_SIZE(%lu) != BLOCK_SIZE(%lu)\n", - PAGE_SIZE, F2FS_BLKSIZE); - return -EINVAL; - } - err = init_inodecache(); if (err) goto fail; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a568ce96cf56..09d3ecfaa4f1 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -484,10 +484,16 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (ret < 0) return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (a->struct_type == FAULT_INFO_TYPE && t >= BIT(FAULT_MAX)) - return -EINVAL; - if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX) - return -EINVAL; + if (a->struct_type == FAULT_INFO_TYPE) { + if (f2fs_build_fault_attr(sbi, 0, t)) + return -EINVAL; + return count; + } + if (a->struct_type == FAULT_INFO_RATE) { + if (f2fs_build_fault_attr(sbi, t, 0)) + return -EINVAL; + return count; + } #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); @@ -675,6 +681,13 @@ static ssize_t __sbi_store(struct f2fs_attr *a, return count; } + if (!strcmp(a->attr.name, "gc_pin_file_threshold")) { + if (t > MAX_GC_FAILED_PINNED_FILES) + return -EINVAL; + sbi->gc_pin_file_threshold = t; + return count; + } + if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { if (t != 0) return -EINVAL; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a357287eac1e..41d1d71c36ff 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -394,7 +394,8 @@ struct f2fs_nat_block { /* * F2FS uses 4 bytes to represent block address. As a result, supported size of - * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + * disk is 16 TB for a 4K page size and 64 TB for a 16K page size and it equals + * to 16 * 1024 * 1024 / 2 segments. */ #define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) @@ -424,8 +425,10 @@ struct f2fs_sit_block { /* * For segment summary * - * One summary block contains exactly 512 summary entries, which represents - * exactly one segment by default. Not allow to change the basic units. + * One summary block with 4KB size contains exactly 512 summary entries, which + * represents exactly one segment with 2MB size. + * Similarly, in the case of block with 16KB size, it represents one segment with 8MB size. + * Not allow to change the basic units. * * NOTE: For initializing fields, you must use set_summary * @@ -556,6 +559,7 @@ typedef __le32 f2fs_hash_t; /* * space utilization of regular dentry and inline dentry (w/o extra reservation) + * when block size is 4KB. * regular dentry inline dentry (def) inline dentry (min) * bitmap 1 * 27 = 27 1 * 23 = 23 1 * 1 = 1 * reserved 1 * 3 = 3 1 * 7 = 7 1 * 1 = 1 diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7ed0fc430dc6..371ba28415f5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1304,11 +1304,11 @@ TRACE_EVENT(f2fs_write_end, __entry->copied) ); -DECLARE_EVENT_CLASS(f2fs__page, +DECLARE_EVENT_CLASS(f2fs__folio, - TP_PROTO(struct page *page, int type), + TP_PROTO(struct folio *folio, int type), - TP_ARGS(page, type), + TP_ARGS(folio, type), TP_STRUCT__entry( __field(dev_t, dev) @@ -1321,14 +1321,14 @@ DECLARE_EVENT_CLASS(f2fs__page, ), TP_fast_assign( - __entry->dev = page_file_mapping(page)->host->i_sb->s_dev; - __entry->ino = page_file_mapping(page)->host->i_ino; + __entry->dev = folio_file_mapping(folio)->host->i_sb->s_dev; + __entry->ino = folio_file_mapping(folio)->host->i_ino; __entry->type = type; __entry->dir = - S_ISDIR(page_file_mapping(page)->host->i_mode); - __entry->index = page->index; - __entry->dirty = PageDirty(page); - __entry->uptodate = PageUptodate(page); + S_ISDIR(folio_file_mapping(folio)->host->i_mode); + __entry->index = folio_index(folio); + __entry->dirty = folio_test_dirty(folio); + __entry->uptodate = folio_test_uptodate(folio); ), TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, " @@ -1341,32 +1341,32 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->uptodate) ); -DEFINE_EVENT(f2fs__page, f2fs_writepage, +DEFINE_EVENT(f2fs__folio, f2fs_writepage, - TP_PROTO(struct page *page, int type), + TP_PROTO(struct folio *folio, int type), - TP_ARGS(page, type) + TP_ARGS(folio, type) ); -DEFINE_EVENT(f2fs__page, f2fs_do_write_data_page, +DEFINE_EVENT(f2fs__folio, f2fs_do_write_data_page, - TP_PROTO(struct page *page, int type), + TP_PROTO(struct folio *folio, int type), - TP_ARGS(page, type) + TP_ARGS(folio, type) ); -DEFINE_EVENT(f2fs__page, f2fs_readpage, +DEFINE_EVENT(f2fs__folio, f2fs_readpage, - TP_PROTO(struct page *page, int type), + TP_PROTO(struct folio *folio, int type), - TP_ARGS(page, type) + TP_ARGS(folio, type) ); -DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, +DEFINE_EVENT(f2fs__folio, f2fs_set_page_dirty, - TP_PROTO(struct page *page, int type), + TP_PROTO(struct folio *folio, int type), - TP_ARGS(page, type) + TP_ARGS(folio, type) ); TRACE_EVENT(f2fs_replace_atomic_write_block,