From e5d2385ed6d8543a8884afc28aee44d46cfe64be Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Jul 2013 10:55:52 +0900 Subject: [PATCH 01/40] f2fs: recover date requested by fdatasync In order to support SQLite that uses fdatasync instead of fsync, we should guarantee the data requested by fdatasync can be recovered after sudden-power- off. So, let's remove the fdatasync condition in f2fs_sync_file. Otherwise, we can restore the data after sudden-power-off due to nonexistence of any fsync mark'ed node blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d2d2b7dbdcc1..157a6357f14c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -147,9 +147,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; - + /* + * Both of fdatasync() and fsync() are able to be recovered from + * sudden-power-off. + */ if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; else if (file_wrong_pino(inode)) From d51a7fba254b48aa7090a74d6b1455b6c41bc889 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 4 Jul 2013 17:12:47 +0900 Subject: [PATCH 02/40] f2fs: add description for fsck.f2fs and dump.f2fs This patch adds some description on fsck.f2fs and dump.f2fs which is recently merged into f2fs-tools. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 43 +++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b91e2f26b672..0500c198cd08 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -18,8 +18,8 @@ according to its internal geometry or flash memory management scheme, namely FTL F2FS and its tools support various parameters not only for configuring on-disk layout, but also for selecting allocation and cleaning algorithms. -The file system formatting tool, "mkfs.f2fs", is available from the following -git tree: +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). >> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git For reporting bugs and sending patches, please use the following mailing list: @@ -149,8 +149,12 @@ USAGE # mkfs.f2fs -l label /dev/block_device # mount -t f2fs /dev/block_device /mnt/f2fs -Format options --------------- +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: -l [label] : Give a volume label, up to 512 unicode name. -a [0 or 1] : Split start location of each area for heap-based allocation. 1 is set by default, which performs this. @@ -164,6 +168,37 @@ Format options -t [0 or 1] : Disable discard command or not. 1 is set by default, which conducts discard. +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + ================================================================================ DESIGN ================================================================================ From 5e176d54a6097f5093cacaaf7baeecbe724d32bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Jun 2013 12:47:01 +0900 Subject: [PATCH 03/40] f2fs: add proc entry to monitor current usage of segments You can monitor valid block counts of whole segments in: /proc/fs/f2fs/sdb1/segment_info. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 467d42d65c48..c7620b973073 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -350,6 +350,7 @@ enum page_type { struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ + struct proc_dir_entry *s_proc; /* proc entry */ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 75c7dc363e92..70dbb313a7ca 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #define CREATE_TRACE_POINTS #include +static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; enum { @@ -223,6 +225,11 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + f2fs_destroy_stats(sbi); stop_gc_thread(sbi); @@ -340,6 +347,36 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + for (i = 0; i < total_segs; i++) { + seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + if (i != 0 && (i % 10) == 0) + seq_puts(seq, "\n"); + else + seq_puts(seq, " "); + } + return 0; +} + +static int segment_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, segment_info_seq_show, PDE_DATA(inode)); +} + +static const struct file_operations f2fs_seq_segment_info_fops = { + .owner = THIS_MODULE, + .open = segment_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -766,6 +803,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto fail; + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + if (test_opt(sbi, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); if (!blk_queue_discard(q)) @@ -852,12 +896,14 @@ static int __init init_f2fs_fs(void) if (err) goto fail; f2fs_create_root_stats(); + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); fail: return err; } static void __exit exit_f2fs_fs(void) { + remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); From 963d4f7d7be6f9dba77362941921c33034fee91e Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 12 Jul 2013 14:47:11 +0800 Subject: [PATCH 04/40] f2fs: add a help func F2FS_STAT() to get the f2fs_stat_info Add a help func F2FS_STAT() to get the f2fs_stat_info. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 16 ++++++++-------- fs/f2fs/f2fs.h | 11 ++++++++--- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0d6c6aafb235..fd12b7ff6e2e 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -29,7 +29,7 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); int i; /* valid check of the segment numbers */ @@ -83,7 +83,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) */ static void update_sit_info(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, vblocks; @@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) */ static void update_mem_info(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; if (si->base_mem) @@ -305,11 +305,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; - sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); - if (!sbi->stat_info) + si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!si) return -ENOMEM; - si = sbi->stat_info; si->all_area_segs = le32_to_cpu(raw_super->segment_count); si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); @@ -319,6 +318,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->main_area_zones = si->main_area_sections / le32_to_cpu(raw_super->secs_per_zone); si->sbi = sbi; + sbi->stat_info = si; mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); @@ -329,13 +329,13 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); mutex_lock(&f2fs_stat_mutex); list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(sbi->stat_info); + kfree(si); } void __init f2fs_create_root_stats(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7620b973073..0a352b658ed8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1113,11 +1113,16 @@ struct f2fs_stat_info { unsigned base_mem, cache_mem; }; +static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_stat_info*)sbi->stat_info; +} + #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_seg_count(sbi, type) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ (si)->tot_segs++; \ if (type == SUM_TYPE_DATA) \ si->data_segs++; \ @@ -1130,14 +1135,14 @@ struct f2fs_stat_info { #define stat_inc_data_blk_count(sbi, blks) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ } while (0) #define stat_inc_node_blk_count(sbi, blks) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ } while (0) From 4559071063270999d016c92a0b9241692cbbb522 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 15 Jul 2013 17:57:38 +0800 Subject: [PATCH 05/40] f2fs: introduce help function F2FS_NODE() Introduce help function F2FS_NODE() to simplify the conversion of node_page to f2fs_node. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 9 +++++++-- fs/f2fs/file.c | 2 +- fs/f2fs/inode.c | 4 ++-- fs/f2fs/node.c | 10 +++++----- fs/f2fs/node.h | 40 ++++++++++++++++------------------------ fs/f2fs/recovery.c | 6 ++---- 8 files changed, 35 insertions(+), 40 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 035f9a345cdf..c73c394c3d8c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -39,7 +39,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) wait_on_page_writeback(node_page); - rn = (struct f2fs_node *)page_address(node_page); + rn = F2FS_NODE(node_page); /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 62f0d5977c64..89ecb3785321 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -270,7 +270,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) struct f2fs_node *rn; /* copy name info. to this inode page */ - rn = (struct f2fs_node *)page_address(ipage); + rn = F2FS_NODE(ipage); rn->i.i_namelen = cpu_to_le32(name->len); memcpy(rn->i.i_name, name->name, name->len); set_page_dirty(ipage); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a352b658ed8..2dfd584bf79f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -455,6 +455,11 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } +static inline struct f2fs_node *F2FS_NODE(struct page *page) +{ + return (struct f2fs_node *)page_address(page); +} + static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) { return (struct f2fs_nm_info *)(sbi->nm_info); @@ -813,7 +818,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, static inline bool IS_INODE(struct page *page) { - struct f2fs_node *p = (struct f2fs_node *)page_address(page); + struct f2fs_node *p = F2FS_NODE(page); return RAW_IS_INODE(p); } @@ -827,7 +832,7 @@ static inline block_t datablock_addr(struct page *node_page, { struct f2fs_node *raw_node; __le32 *addr_array; - raw_node = (struct f2fs_node *)page_address(node_page); + raw_node = F2FS_NODE(node_page); addr_array = blkaddr_in_node(raw_node); return le32_to_cpu(addr_array[offset]); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 157a6357f14c..65ca3b3e5806 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -206,7 +206,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) struct f2fs_node *raw_node; __le32 *addr; - raw_node = page_address(dn->node_page); + raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + ofs; for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2b2d45d19e3e..debf74308045 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -56,7 +56,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_page)) return PTR_ERR(node_page); - rn = page_address(node_page); + rn = F2FS_NODE(node_page); ri = &(rn->i); inode->i_mode = le16_to_cpu(ri->i_mode); @@ -153,7 +153,7 @@ void update_inode(struct inode *inode, struct page *node_page) wait_on_page_writeback(node_page); - rn = page_address(node_page); + rn = F2FS_NODE(node_page); ri = &(rn->i); ri->i_mode = cpu_to_le16(inode->i_mode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b418aee09573..f5172e271d46 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -565,7 +565,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, return PTR_ERR(page); } - rn = (struct f2fs_node *)page_address(page); + rn = F2FS_NODE(page); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -698,7 +698,7 @@ restart: set_new_dnode(&dn, inode, page, NULL, 0); unlock_page(page); - rn = page_address(page); + rn = F2FS_NODE(page); switch (level) { case 0: case 1: @@ -1484,8 +1484,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - src = (struct f2fs_node *)page_address(page); - dst = (struct f2fs_node *)page_address(ipage); + src = F2FS_NODE(page); + dst = F2FS_NODE(ipage); memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); dst->i.i_size = 0; @@ -1535,7 +1535,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, goto out; lock_page(page); - rn = (struct f2fs_node *)page_address(page); + rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index c65fb4f4230f..87349c4ea0ef 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -155,8 +155,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); if (reset) memset(rn, 0, sizeof(*rn)); rn->footer.nid = cpu_to_le32(nid); @@ -166,10 +165,8 @@ static inline void fill_node_footer(struct page *page, nid_t nid, static inline void copy_node_footer(struct page *dst, struct page *src) { - void *src_addr = page_address(src); - void *dst_addr = page_address(dst); - struct f2fs_node *src_rn = (struct f2fs_node *)src_addr; - struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr; + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } @@ -177,45 +174,40 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); + rn->footer.cp_ver = ckpt->checkpoint_ver; rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } static inline nid_t ino_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline unsigned long long cpver_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); } static inline block_t next_blkaddr_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.next_blkaddr); } @@ -250,7 +242,7 @@ static inline bool IS_DNODE(struct page *node_page) static inline void set_nid(struct page *p, int off, nid_t nid, bool i) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + struct f2fs_node *rn = F2FS_NODE(p); wait_on_page_writeback(p); @@ -263,7 +255,8 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) static inline nid_t get_nid(struct page *p, int off, bool i) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + struct f2fs_node *rn = F2FS_NODE(p); + if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); return le32_to_cpu(rn->in.nid[off]); @@ -314,8 +307,7 @@ static inline void clear_cold_data(struct page *page) static inline int is_node(struct page *page, int type) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); return le32_to_cpu(rn->footer.flag) & (1 << type); } @@ -325,7 +317,7 @@ static inline int is_node(struct page *page, int type) static inline void set_cold_node(struct inode *inode, struct page *page) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (S_ISDIR(inode->i_mode)) @@ -337,7 +329,7 @@ static inline void set_cold_node(struct inode *inode, struct page *page) static inline void set_mark(struct page *page, int mark, int type) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= (0x1 << type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d56d951c2253..639eb3465286 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - void *kaddr = page_address(ipage); - struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; + struct f2fs_node *raw_node = F2FS_NODE(ipage); struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; @@ -93,8 +92,7 @@ out: static int recover_inode(struct inode *inode, struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; + struct f2fs_node *raw_node = F2FS_NODE(node_page); struct f2fs_inode *raw_inode = &(raw_node->i); if (!IS_INODE(node_page)) From 1cd14cafc694bcedc5017a4f0dcb3c3faddec622 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 18 Jul 2013 18:02:31 +0900 Subject: [PATCH 06/40] f2fs: update file name in the inode block during f2fs_rename The error is reproducible by: 0. mkfs.f2fs /dev/sdb1 & mount 1. touch test1 2. touch test2 3. mv test1 test2 4. umount 5. dumpt.f2fs -i 4 /dev/sdb1 After this, when we retrieve the inode->i_name of test2 by dump.f2fs, we get test1 instead of test2. This is because f2fs didn't update the file name during the f2fs_rename. So, this patch fixes that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 15 +++++++++++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/namei.c | 5 +++++ 3 files changed, 21 insertions(+) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 89ecb3785321..d1bb2606b313 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -276,6 +276,21 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } +int update_dent_inode(struct inode *inode, const struct qstr *name) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + init_dent_inode(name, page); + f2fs_put_page(page, 1); + + return 0; +} + static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2dfd584bf79f..a6858c75259d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -953,6 +953,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); +int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 64c07169df05..32972787f12f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -427,6 +427,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_dir; + if (update_dent_inode(old_inode, &new_dentry->d_name)) { + f2fs_put_page(new_page, 1); + goto out_dir; + } + f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME; From f0947e5ccedb749b2f701aa5279cd8f848a720a1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 22 Jul 2013 22:12:56 +0900 Subject: [PATCH 07/40] f2fs: fix i_name during f2fs_sync_file As similar as the i_pino fix, i_name also should be fixed when i_nlink is 1. The errorneous scenario is like this. 1. touch test1 2. link test1 test2 3. unlink test2 4. fsync test1 After this, i_name should be test1. CC: Al Viro Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 65ca3b3e5806..c2deb27ffb72 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -112,11 +112,13 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - inode = igrab(dentry->d_parent->d_inode); - dput(dentry); + if (update_dent_inode(inode, &dentry->d_name)) { + dput(dentry); + return 0; + } - *pino = inode->i_ino; - iput(inode); + *pino = parent_ino(dentry); + dput(dentry); return 1; } From 2d219c518882d2b2bac77742a6a8979c9dad051a Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 22 Jul 2013 16:33:32 +0800 Subject: [PATCH 08/40] f2fs: use seq_puts()/seq_putc() rather than seq_printf() where possible For string without format specifiers, using seq_puts()/seq_putc() instead of seq_printf(). Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index fd12b7ff6e2e..a84b0a8e6854 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -253,21 +253,21 @@ static int stat_show(struct seq_file *s, void *v) si->nats, NM_WOUT_THRESHOLD); seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", si->sits, si->fnids); - seq_printf(s, "\nDistribution of User Blocks:"); - seq_printf(s, " [ valid | invalid | free ]\n"); - seq_printf(s, " ["); + seq_puts(s, "\nDistribution of User Blocks:"); + seq_puts(s, " [ valid | invalid | free ]\n"); + seq_puts(s, " ["); for (j = 0; j < si->util_valid; j++) - seq_printf(s, "-"); - seq_printf(s, "|"); + seq_putc(s, '-'); + seq_putc(s, '|'); for (j = 0; j < si->util_invalid; j++) - seq_printf(s, "-"); - seq_printf(s, "|"); + seq_putc(s, '-'); + seq_putc(s, '|'); for (j = 0; j < si->util_free; j++) - seq_printf(s, "-"); - seq_printf(s, "]\n\n"); + seq_putc(s, '-'); + seq_puts(s, "]\n\n"); seq_printf(s, "SSR: %u blocks in %u segments\n", si->block_count[SSR], si->segment_count[SSR]); seq_printf(s, "LFS: %u blocks in %u segments\n", From 60ed9a0f53c55d4be3882f7522e8349a5011a1d5 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 19 Jul 2013 16:24:06 +0800 Subject: [PATCH 09/40] f2fs: use list_for_each rather than list_for_each_safe, in remove_orphan_inode() As we remove the target single node, so list_for_each is enought, in order to clean up, we use list_for_each_entry instead. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 66a6b85a51d8..fe91773de130 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -237,13 +237,12 @@ out: void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { - struct list_head *this, *next, *head; + struct list_head *head; struct orphan_inode_entry *orphan; mutex_lock(&sbi->orphan_inode_mutex); head = &sbi->orphan_inode_list; - list_for_each_safe(this, next, head) { - orphan = list_entry(this, struct orphan_inode_entry, list); + list_for_each_entry(orphan, head, list) { if (orphan->ino == ino) { list_del(&orphan->list); kmem_cache_free(orphan_entry_slab, orphan); From d8207f69589c74037128ff6c9e1a44223fad3b7c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 25 Jul 2013 11:30:01 +0800 Subject: [PATCH 10/40] f2fs: move bio_private allocation out of f2fs_bio_alloc() bio->bi_private is not always needed. As in the reading data path, end_read_io does not need bio_private for further using, so moving bio_private allocation out of f2fs_bio_alloc(). Alloc it in the submit_write_page(), and ignore it in the f2fs_readpage(). Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/segment.c | 19 +++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c73c394c3d8c..19cd7c6e964c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -365,7 +365,6 @@ static void read_end_io(struct bio *bio, int err) } unlock_page(page); } while (bvec >= bio->bi_io_vec); - kfree(bio->bi_private); bio_put(bio); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a86d125a9885..9b74ae2137d1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -611,18 +611,12 @@ static void f2fs_end_io_write(struct bio *bio, int err) struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) { struct bio *bio; - struct bio_private *priv; -retry: - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); - if (!priv) { - cond_resched(); - goto retry; - } /* No failure on bio allocation */ bio = bio_alloc(GFP_NOIO, npages); bio->bi_bdev = bdev; - bio->bi_private = priv; + bio->bi_private = NULL; + return bio; } @@ -681,8 +675,17 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, do_submit_bio(sbi, type, false); alloc_new: if (sbi->bio[type] == NULL) { + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + sbi->bio[type]->bi_private = priv; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much From cbd56e7d20d7188d62a85aa6986a7b2c8e755ab5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 30 Jul 2013 11:36:53 +0900 Subject: [PATCH 11/40] f2fs: fix handling orphan inodes This patch fixes mishandling of the sbi->n_orphans variable. If users request lots of f2fs_unlink(), check_orphan_space() could be contended. In such the case, sbi->n_orphans can be read incorrectly so that f2fs_unlink() would fall into the wrong state which results in the failure of add_orphan_inode(). So, let's increment sbi->n_orphans virtually prior to the actual orphan inode stuffs. After that, let's release sbi->n_orphans by calling release_orphan_inode or remove_orphan_inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 13 ++++++++++--- fs/f2fs/dir.c | 2 ++ fs/f2fs/f2fs.h | 3 ++- fs/f2fs/namei.c | 19 ++++++++++++++----- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fe91773de130..c5a5c390a5cc 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -182,7 +182,7 @@ const struct address_space_operations f2fs_meta_aops = { .set_page_dirty = f2fs_set_meta_page_dirty, }; -int check_orphan_space(struct f2fs_sb_info *sbi) +int acquire_orphan_inode(struct f2fs_sb_info *sbi) { unsigned int max_orphans; int err = 0; @@ -197,10 +197,19 @@ int check_orphan_space(struct f2fs_sb_info *sbi) mutex_lock(&sbi->orphan_inode_mutex); if (sbi->n_orphans >= max_orphans) err = -ENOSPC; + else + sbi->n_orphans++; mutex_unlock(&sbi->orphan_inode_mutex); return err; } +void release_orphan_inode(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->orphan_inode_mutex); + sbi->n_orphans--; + mutex_unlock(&sbi->orphan_inode_mutex); +} + void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct list_head *head, *this; @@ -229,8 +238,6 @@ retry: list_add(&new->list, this->prev); else list_add_tail(&new->list, head); - - sbi->n_orphans++; out: mutex_unlock(&sbi->orphan_inode_mutex); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index d1bb2606b313..384c6daf9a89 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -572,6 +572,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (inode->i_nlink == 0) add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); } if (bit_pos == NR_DENTRY_IN_BLOCK) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a6858c75259d..78777cdb89de 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1044,7 +1044,8 @@ void destroy_segment_manager(struct f2fs_sb_info *); struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -int check_orphan_space(struct f2fs_sb_info *); +int acquire_orphan_inode(struct f2fs_sb_info *); +void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 32972787f12f..4e475181280c 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -239,7 +239,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; - err = check_orphan_space(sbi); + err = acquire_orphan_inode(sbi); if (err) { kunmap(page); f2fs_put_page(page, 0); @@ -393,7 +393,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *old_dir_page; - struct page *old_page; + struct page *old_page, *new_page; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; @@ -415,7 +415,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, ilock = mutex_lock_op(sbi); if (new_inode) { - struct page *new_page; err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) @@ -427,9 +426,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_dir; + err = acquire_orphan_inode(sbi); + if (err) + goto put_out_dir; + if (update_dent_inode(old_inode, &new_dentry->d_name)) { - f2fs_put_page(new_page, 1); - goto out_dir; + release_orphan_inode(sbi); + goto put_out_dir; } f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -438,8 +441,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir_entry) drop_nlink(new_inode); drop_nlink(new_inode); + if (!new_inode->i_nlink) add_orphan_inode(sbi, new_inode->i_ino); + else + release_orphan_inode(sbi); + update_inode_page(new_inode); } else { err = f2fs_add_link(new_dentry, old_inode); @@ -472,6 +479,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, mutex_unlock_op(sbi, ilock); return 0; +put_out_dir: + f2fs_put_page(new_page, 1); out_dir: if (old_dir_entry) { kunmap(old_dir_page); From f0c5e565bb05a4cd6105bb197c56078462252e78 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 31 Jul 2013 11:44:13 +0300 Subject: [PATCH 12/40] f2fs: remove an unneeded kfree(NULL) This kfree() is no longer needed after a79dc083d7 "f2fs: move bio_private allocation out of f2fs_bio_alloc()". The "bio->bi_private" is NULL here so it's a no-op. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 19cd7c6e964c..027341cacd2b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -390,7 +390,6 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, bio->bi_end_io = read_end_io; if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - kfree(bio->bi_private); bio_put(bio); up_read(&sbi->bio_sem); f2fs_put_page(page, 1); From b59d0bae6ca30c496f298881616258f9cde0d9c6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 4 Aug 2013 23:09:40 +0900 Subject: [PATCH 13/40] f2fs: add sysfs support for controlling the gc_thread Add sysfs entries to control the timing parameters for f2fs gc thread. Various Sysfs options introduced are: gc_min_sleep_time: Min Sleep time for GC in ms gc_max_sleep_time: Max Sleep time for GC in ms gc_no_gc_sleep_time: Default Sleep time for GC in ms Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: fix an umount bug and some minor changes] Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 20 ++++ Documentation/filesystems/f2fs.txt | 26 +++++ fs/f2fs/f2fs.h | 4 + fs/f2fs/gc.c | 17 ++-- fs/f2fs/gc.h | 33 ++++--- fs/f2fs/super.c | 123 ++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 20 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-fs-f2fs diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs new file mode 100644 index 000000000000..98e53a09530a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -0,0 +1,20 @@ +What: /sys/fs/f2fs//gc_max_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the maximun sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_min_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the minimum sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_no_gc_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the default sleep time for gc_thread. Time + is in milliseconds. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0500c198cd08..5daf3bb2eef9 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -132,6 +132,32 @@ f2fs. Each file shows the whole f2fs information. - average SIT information about whole segments - current memory footprint consumed by f2fs. +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 78777cdb89de..63813befdd82 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -430,6 +430,10 @@ struct f2fs_sb_info { #endif unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; }; /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 35f9b1a196aa..60d4f674efa7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -29,10 +29,11 @@ static struct kmem_cache *winode_slab; static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; long wait_ms; - wait_ms = GC_THREAD_MIN_SLEEP_TIME; + wait_ms = gc_th->min_sleep_time; do { if (try_to_freeze()) @@ -45,7 +46,7 @@ static int gc_thread_func(void *data) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = GC_THREAD_MAX_SLEEP_TIME; + wait_ms = increase_sleep_time(gc_th, wait_ms); continue; } @@ -66,15 +67,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(wait_ms); + wait_ms = decrease_sleep_time(gc_th, wait_ms); else - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); #ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; @@ -82,7 +83,7 @@ static int gc_thread_func(void *data) /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) - wait_ms = GC_THREAD_NOGC_SLEEP_TIME; + wait_ms = gc_th->no_gc_sleep_time; } while (!kthread_should_stop()); return 0; } @@ -101,6 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 2c6a6bd08322..f4bf44c9deda 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,9 +13,9 @@ * whether IO subsystem is idle * or not */ -#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ -#define GC_THREAD_MAX_SLEEP_TIME 60000 -#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ @@ -25,6 +25,11 @@ struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; }; struct inode_entry { @@ -56,25 +61,25 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(long wait) +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) + if (wait == gc_th->no_gc_sleep_time) return wait; - wait += GC_THREAD_MIN_SLEEP_TIME; - if (wait > GC_THREAD_MAX_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; return wait; } -static inline long decrease_sleep_time(long wait) +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; - wait -= GC_THREAD_MIN_SLEEP_TIME; - if (wait <= GC_THREAD_MIN_SLEEP_TIME) - wait = GC_THREAD_MIN_SLEEP_TIME; + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; return wait; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 70dbb313a7ca..e161a24fbf39 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -23,17 +23,21 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" #include "segment.h" #include "xattr.h" +#include "gc.h" #define CREATE_TRACE_POINTS #include static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; enum { Opt_gc_background, @@ -59,6 +63,111 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -229,6 +338,7 @@ static void f2fs_put_super(struct super_block *sb) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } + kobject_del(&sbi->s_kobj); f2fs_destroy_stats(sbi); stop_gc_thread(sbi); @@ -243,6 +353,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); @@ -818,6 +930,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "the device does not support discard"); } + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + return 0; fail: stop_gc_thread(sbi); @@ -892,6 +1011,9 @@ static int __init init_f2fs_fs(void) err = create_checkpoint_caches(); if (err) goto fail; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) + goto fail; err = register_filesystem(&f2fs_fs_type); if (err) goto fail; @@ -910,6 +1032,7 @@ static void __exit exit_f2fs_fs(void) destroy_gc_caches(); destroy_node_manager_caches(); destroy_inodecache(); + kset_unregister(f2fs_kset); } module_init(init_f2fs_fs) From d2dc095f4280ad5fdea33769e8e119fd16648426 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 4 Aug 2013 23:10:15 +0900 Subject: [PATCH 14/40] f2fs: add sysfs entries to select the gc policy Add sysfs entry gc_idle to control the gc policy. Where gc_idle = 1 corresponds to selecting a cost benefit approach, while gc_idle = 2 corresponds to selecting a greedy approach to garbage collection. The selection is mutually exclusive one approach will work at any point. If gc_idle = 0, then this option is disabled. Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: change the select_gc_type() flow slightly] Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ Documentation/filesystems/f2fs.txt | 6 ++++++ fs/f2fs/gc.c | 16 +++++++++++++--- fs/f2fs/gc.h | 3 +++ fs/f2fs/super.c | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 98e53a09530a..31942efcaf0e 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -18,3 +18,9 @@ Contact: "Namjae Jeon" Description: Controls the default sleep time for gc_thread. Time is in milliseconds. + +What: /sys/fs/f2fs//gc_idle +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the victim selection policy for garbage collection. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 5daf3bb2eef9..3cd27bed6349 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -158,6 +158,12 @@ Files in /sys/fs/f2fs/ time for the garbage collection thread. Time is in milliseconds. + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 60d4f674efa7..d286d8be8e68 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -106,6 +106,8 @@ int start_gc_thread(struct f2fs_sb_info *sbi) gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->gc_idle = 0; + sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, @@ -130,9 +132,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = NULL; } -static int select_gc_type(int gc_type) +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { - return (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; } static void select_policy(struct f2fs_sb_info *sbi, int gc_type, @@ -145,7 +155,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->dirty_segmap = dirty_i->dirty_segmap[type]; p->ofs_unit = 1; } else { - p->gc_mode = select_gc_type(gc_type); + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; p->ofs_unit = sbi->segs_per_sec; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index f4bf44c9deda..c22dee9f1422 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -30,6 +30,9 @@ struct f2fs_gc_kthread { unsigned int min_sleep_time; unsigned int max_sleep_time; unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; }; struct inode_entry { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e161a24fbf39..94c0e2049546 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -148,12 +148,14 @@ static struct f2fs_attr f2fs_attr_##_name = { \ F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), NULL, }; From df273efc36024a9ea97fd687d638a70c3ea8c37a Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 4 Aug 2013 23:10:28 +0900 Subject: [PATCH 15/40] f2fs: remove redundant code from f2fs_write_begin This code is being used for nobh_write_end() function. But since now f2fs_write_end function is added so there is no need for this code. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 027341cacd2b..f458883af815 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -634,9 +634,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, int err = 0; int ilock; - /* for nobh_write_end */ - *fsdata = NULL; - f2fs_balance_fs(sbi); repeat: page = grab_cache_page_write_begin(mapping, index, flags); From a569469e967022d9ceeaa4b73619f96614087d2d Mon Sep 17 00:00:00 2001 From: Jin Xu Date: Mon, 5 Aug 2013 20:02:04 +0800 Subject: [PATCH 16/40] f2fs: fix a deadlock in fsync This patch fixes a deadlock bug that occurs quite often when there are concurrent write and fsync on a same file. Following is the simplified call trace when tasks get hung. fsync thread: - f2fs_sync_file ... - f2fs_write_data_pages ... - update_extent_cache ... - update_inode - wait_on_page_writeback bdi writeback thread - __writeback_single_inode - f2fs_write_data_pages - mutex_lock(sbi->writepages) The deadlock happens when the fsync thread waits on a inode page that has been added to the f2fs' cached bio sbi->bio[NODE], and unfortunately, no one else could be able to submit the cached bio to block layer for writeback. This is because the fsync thread already hold a sbi->fs_lock and the sbi->writepages lock, causing the bdi thread being blocked when attempt to write data pages for the same inode. At the same time, f2fs_gc thread does not notice the situation and could not help. Even the sync syscall gets blocked. To fix it, we could submit the cached bio first before waiting on a inode page that is being written back. Signed-off-by: Jin Xu [Jaegeuk Kim: add more cases to use f2fs_wait_on_page_writeback] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/gc.c | 8 ++------ fs/f2fs/inode.c | 2 +- fs/f2fs/segment.c | 10 ++++++++++ 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f458883af815..a7eb52925723 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -37,7 +37,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) struct page *node_page = dn->node_page; unsigned int ofs_in_node = dn->ofs_in_node; - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, false); rn = F2FS_NODE(node_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 63813befdd82..13db10b70e66 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1023,7 +1023,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); struct bio *f2fs_bio_alloc(struct block_device *, int); -void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, block_t, block_t *); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d286d8be8e68..e6b3ffd5ff6a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -422,8 +422,7 @@ next_step: /* set page dirty and write it */ if (gc_type == FG_GC) { - f2fs_submit_bio(sbi, NODE, true); - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, true); set_page_dirty(node_page); } else { if (!PageWriteback(node_page)) @@ -523,10 +522,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (PageWriteback(page)) { - f2fs_submit_bio(sbi, DATA, true); - wait_on_page_writeback(page); - } + f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index debf74308045..9ab81e7472c5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -151,7 +151,7 @@ void update_inode(struct inode *inode, struct page *node_page) struct f2fs_node *rn; struct f2fs_inode *ri; - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, false); rn = F2FS_NODE(node_page); ri = &(rn->i); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9b74ae2137d1..68e344f9e042 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -705,6 +705,16 @@ retry: trace_f2fs_submit_write_page(page, blk_addr, type); } +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, type, sync); + wait_on_page_writeback(page); + } +} + static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); From c2d715d144af7209d035d0fe0b20a7a591c22c21 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Aug 2013 15:56:49 +0900 Subject: [PATCH 17/40] f2fs: fix a build failure due to missing the kobject header This patch should resolve the following error reported by kbuild test robot. All error/warnings: In file included from fs/f2fs/dir.c:13:0: >> fs/f2fs/f2fs.h:435:17: error: field 's_kobj' has incomplete type struct kobject s_kobj; The failure was caused by missing the kobject header file in dir.c. So, this patch move the header file to the right location, f2fs.h. CC: Namjae Jeon Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/super.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 13db10b70e66..d8e386ceb0cd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * For mount options diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 94c0e2049546..66d1ec137e44 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include "f2fs.h" From dbe6a5ff4fa78bdfa983458c338831d91b35f315 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Aug 2013 08:14:06 +0900 Subject: [PATCH 18/40] f2fs: fix the use of XATTR_NODE_OFFSET This patch fixes the use of XATTR_NODE_OFFSET. o The offset should not use several MSB bits which are used by marking node blocks. o IS_DNODE should handle XATTR_NODE_OFFSET to avoid potential abnormality during the fsync call. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 +++++++----- fs/f2fs/node.h | 4 ++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d8e386ceb0cd..eb8c45b65fe5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -135,11 +135,13 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) /* * For INODE and NODE manager */ -#define XATTR_NODE_OFFSET (-1) /* - * store xattrs to one node block per - * file keeping -1 as its node offset to - * distinguish from index node blocks. - */ +/* + * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 + * as its node offset to distinguish from index node blocks. + * But some bits are used to mark the node block. + */ +#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ + >> OFFSET_BIT_SHIFT) enum { ALLOC_NODE, /* allocate a new node page if needed */ LOOKUP_NODE, /* look up a node without readahead */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 87349c4ea0ef..3496bb3e15dc 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -229,6 +229,10 @@ static inline block_t next_blkaddr_of_node(struct page *node_page) static inline bool IS_DNODE(struct page *node_page) { unsigned int ofs = ofs_of_node(node_page); + + if (ofs == XATTR_NODE_OFFSET) + return false; + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || ofs == 5 + 2 * NIDS_PER_BLOCK) return false; From e518ff81c337e39b9b63d92ba41e4512521d7c9c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Aug 2013 14:46:15 +0900 Subject: [PATCH 19/40] f2fs: fix inconsistency between xattr node blocks and its inode Previously xattr node blocks are stored to the COLD_NODE log, which means that our roll-forward mechanism doesn't recover the xattr node blocks at all. Only the direct node blocks in the WARM_NODE log can be recovered. So, let's resolve the issue simply by conducting checkpoint during fsync when a file has a modified xattr node block. This approach is able to degrade the performance, but normally the checkpoint overhead is shown at the initial fsync call after the xattr entry changes. Once the checkpoint is done, no additional overhead would be occurred. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 5 +++++ fs/f2fs/xattr.c | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb8c45b65fe5..c1c9670e642f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -181,6 +181,7 @@ struct f2fs_inode_info { f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ + unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ }; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c2deb27ffb72..8ef3184b0037 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -161,10 +161,15 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; + else if (F2FS_I(inode)->xattr_ver == + le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver)) + need_cp = true; if (need_cp) { nid_t pino; + F2FS_I(inode)->xattr_ver = 0; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); if (file_wrong_pino(inode) && inode->i_nlink == 1 && diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3ab07ecd86ca..0f6d2a1b9a7a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -486,6 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } + + /* store checkpoint version for conducting checkpoint during fsync */ + fi->xattr_ver = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); + if (ipage) update_inode(inode, ipage); else From d71b5564c0da4f652af2e4ca9d3c22b9c960ec1f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Aug 2013 15:03:21 +0900 Subject: [PATCH 20/40] f2fs: introduce cur_cp_version function to reduce code size This patch introduces a new inline function, cur_cp_version, to reduce redundant codes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/file.c | 3 +-- fs/f2fs/recovery.c | 4 ++-- fs/f2fs/xattr.c | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c5a5c390a5cc..bb312201ca95 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -379,7 +379,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; - pre_version = le64_to_cpu(cp_block->checkpoint_ver); + pre_version = cur_cp_version(cp_block); /* Read the 2nd cp block in this CP pack */ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; @@ -394,7 +394,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; - cur_version = le64_to_cpu(cp_block->checkpoint_ver); + cur_version = cur_cp_version(cp_block); if (cur_version == pre_version) { *version = cur_version; @@ -799,7 +799,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) * Increase the version number so that * SIT entries and seg summaries are written at correct place */ - ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver); + ckpt_ver = cur_cp_version(ckpt); ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c1c9670e642f..5348b63adbe9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -503,6 +503,11 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) sbi->s_dirty = 0; } +static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) +{ + return le64_to_cpu(cp->checkpoint_ver); +} + static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) { unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); @@ -691,7 +696,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) { block_t start_addr; struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); + unsigned long long ckpt_version = cur_cp_version(ckpt); start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8ef3184b0037..bd4184e35525 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -161,8 +161,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; - else if (F2FS_I(inode)->xattr_ver == - le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver)) + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; if (need_cp) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 639eb3465286..c6908b5d9d84 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -117,7 +117,7 @@ static int recover_inode(struct inode *inode, struct page *node_page) static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { - unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page; block_t blkaddr; @@ -355,7 +355,7 @@ err: static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head, int type) { - unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page; int err = 0; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0f6d2a1b9a7a..fb16f71e4c23 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -488,7 +488,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, } /* store checkpoint version for conducting checkpoint during fsync */ - fi->xattr_ver = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver); + fi->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); if (ipage) update_inode(inode, ipage); From 41dfde135f9169948dd0c9bba948774f2e521210 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 9 Aug 2013 18:21:24 +0800 Subject: [PATCH 21/40] f2fs: clean up the needless end 'return' of void function Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - fs/f2fs/node.c | 1 - fs/f2fs/segment.c | 4 ---- fs/f2fs/super.c | 1 - 4 files changed, 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7eb52925723..ea3cb29018e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -176,7 +176,6 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) end_update: write_unlock(&fi->ext.ext_lock); sync_inode_page(dn); - return; } struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f5172e271d46..bb8fbda43669 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -916,7 +916,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_put_page(apage, 0); else if (err == LOCKED_PAGE) f2fs_put_page(apage, 1); - return; } struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 68e344f9e042..9c45b8ee6881 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -117,7 +117,6 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } mutex_unlock(&dirty_i->seglist_lock); - return; } /* @@ -261,7 +260,6 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, void *addr = curseg->sum_blk; addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); - return; } /* @@ -559,7 +557,6 @@ out: #ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; #endif - return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -1192,7 +1189,6 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); - return; } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 66d1ec137e44..1d12e60c00d9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -317,7 +317,6 @@ static int f2fs_drop_inode(struct inode *inode) static void f2fs_dirty_inode(struct inode *inode, int flags) { set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); - return; } static void f2fs_i_callback(struct rcu_head *head) From 9c02740c0174932162531a28ba8593e82884a9d7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 12 Aug 2013 16:00:46 +0900 Subject: [PATCH 22/40] f2fs: check the free space first in new_node_page Let's check the free space in prior to the main process of allocating a new node page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bb8fbda43669..858a333f6ab9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -833,29 +833,29 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - get_node_info(sbi, dn->nid, &old_ni); + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } - SetPageUptodate(page); - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + get_node_info(sbi, dn->nid, &old_ni); /* Reinitialize old_ni with new node page */ BUG_ON(old_ni.blk_addr != NULL_ADDR); new_ni = old_ni; new_ni.ino = dn->inode->i_ino; - - if (!inc_valid_node_count(sbi, dn->inode, 1)) { - err = -ENOSPC; - goto fail; - } set_node_addr(sbi, &new_ni, NEW_ADDR); + + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); + SetPageUptodate(page); + set_page_dirty(page); dn->node_page = page; if (ipage) update_inode(dn->inode, ipage); else sync_inode_page(dn); - set_page_dirty(page); if (ofs == 0) inc_valid_inode_count(sbi); From 479bd73ac425ff117efeea051077b4277baab52e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 12 Aug 2013 16:04:53 +0900 Subject: [PATCH 23/40] f2fs: should cover i_xattr_nid with its xattr node page lock Previously, f2fs_setxattr assigns i_xattr_nid in the inode page inconsistently. The scenario is: = Thread 1 = = Thread 2 = = fi->i_xattr_nid = = on-disk nid = f2fs_setxattr 0 0 new_node_page X 0 sync_inode_page X X checkpoint X X -. grab_cache_page X X | --> allocate a new xattr node block or -ENOSPC <----------------' At this moment, the checkpoint stores inconsistent data where the inode has i_xattr_nid but actual xattr node block is not allocated yet. So, we should assign the real i_xattr_nid only after its xattr node block is allocated. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ fs/f2fs/xattr.c | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 858a333f6ab9..1c21344fba30 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -851,6 +851,9 @@ struct page *new_node_page(struct dnode_of_data *dn, SetPageUptodate(page); set_page_dirty(page); + if (ofs == XATTR_NODE_OFFSET) + F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + dn->node_page = page; if (ipage) update_inode(dn->inode, ipage); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index fb16f71e4c23..3bc307c22b70 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -378,23 +378,23 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, if (!fi->i_xattr_nid) { /* Allocate new attribute block */ struct dnode_of_data dn; + nid_t new_nid; - if (!alloc_nid(sbi, &fi->i_xattr_nid)) { + if (!alloc_nid(sbi, &new_nid)) { error = -ENOSPC; goto exit; } - set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); + set_new_dnode(&dn, inode, NULL, NULL, new_nid); mark_inode_dirty(inode); page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); if (IS_ERR(page)) { - alloc_nid_failed(sbi, fi->i_xattr_nid); - fi->i_xattr_nid = 0; + alloc_nid_failed(sbi, new_nid); error = PTR_ERR(page); goto exit; } - alloc_nid_done(sbi, fi->i_xattr_nid); + alloc_nid_done(sbi, new_nid); base_addr = page_address(page); header = XATTR_HDR(base_addr); header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); From e27dae4d663762da2020e93885be2219f0608ec6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Aug 2013 08:54:56 +0300 Subject: [PATCH 24/40] f2fs: alloc_page() doesn't return an ERR_PTR alloc_page() returns a NULL on failure, it never returns an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- fs/f2fs/recovery.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1c21344fba30..818ff368de81 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1517,8 +1517,8 @@ int restore_node_summary(struct f2fs_sb_info *sbi, /* alloc temporal page for read node */ page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (IS_ERR(page)) - return PTR_ERR(page); + if (!page) + return -ENOMEM; lock_page(page); /* scan the node segment */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c6908b5d9d84..fa493bb64167 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,8 +129,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) /* read node page */ page = alloc_page(GFP_F2FS_ZERO); - if (IS_ERR(page)) - return PTR_ERR(page); + if (!page) + return -ENOMEM; lock_page(page); while (1) { @@ -367,7 +367,7 @@ static int recover_data(struct f2fs_sb_info *sbi, /* read node page */ page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (IS_ERR(page)) + if (!page) return -ENOMEM; lock_page(page); From 92c4342fb72a6baf9ee9fcd079b46ed0286ebe33 Mon Sep 17 00:00:00 2001 From: Jin Xu Date: Thu, 15 Aug 2013 19:17:01 +0800 Subject: [PATCH 25/40] f2fs: avoid writing inode redundantly when creating a file In f2fs_write_inode, updating inode after f2fs_balance_fs is not a optimized way in the case that f2fs_gc is performed ahead. The inode page will be unnecessarily written out twice, one of which is in f2fs_gc->...->sync_node_pages and the other is in update_inode_page. Let's update the inode page in prior to f2fs_balance_fs to avoid this. To reproduce it, $ touch file (before this step, should make the device need f2fs_gc) $ sync (or wait the bdi to write dirty inode) Signed-off-by: Jin Xu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9ab81e7472c5..7f8569bd8759 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -221,9 +221,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) return 0; - if (wbc) - f2fs_balance_fs(sbi); - /* * We need to lock here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. @@ -231,6 +228,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) ilock = mutex_lock_op(sbi); ret = update_inode_page(inode); mutex_unlock_op(sbi, ilock); + + if (wbc) + f2fs_balance_fs(sbi); + return ret; } From 7b40527508670e56d817b837b2114bc340446539 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 19 Aug 2013 09:41:15 +0800 Subject: [PATCH 26/40] f2fs: fix a compound statement label error An error "label at end of compound statement" will occur if CONFIG_F2FS_STAT_FS disabled. fs/f2fs/segment.c:556:1: error: label at end of compound statement So clean up the 'out' label to fix it. Reported-by: Fengguang Wu Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9c45b8ee6881..09af9c7b0f52 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -540,12 +540,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, { struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) { + if (force) new_curseg(sbi, type, true); - goto out; - } - - if (type == CURSEG_WARM_NODE) + else if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); @@ -553,7 +550,6 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, true); else new_curseg(sbi, type, false); -out: #ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; #endif From 9890ff3f23ed78c63611f661006cd5ae38550f44 Mon Sep 17 00:00:00 2001 From: Zhao Hongjiang Date: Tue, 20 Aug 2013 16:49:51 +0800 Subject: [PATCH 27/40] f2fs: fix memory leak when init f2fs filesystem fail When any of the caches create fails in init_f2fs_fs(), the other caches which are create successful should be free. Signed-off-by: Zhao Hongjiang Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1d12e60c00d9..9e51e4fc00a0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1004,21 +1004,33 @@ static int __init init_f2fs_fs(void) goto fail; err = create_node_manager_caches(); if (err) - goto fail; + goto free_inodecache; err = create_gc_caches(); if (err) - goto fail; + goto free_node_manager_caches; err = create_checkpoint_caches(); if (err) - goto fail; + goto free_gc_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); if (!f2fs_kset) - goto fail; + goto free_checkpoint_caches; err = register_filesystem(&f2fs_fs_type); if (err) - goto fail; + goto free_kset; f2fs_create_root_stats(); f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return 0; + +free_kset: + kset_unregister(f2fs_kset); +free_checkpoint_caches: + destroy_checkpoint_caches(); +free_gc_caches: + destroy_gc_caches(); +free_node_manager_caches: + destroy_node_manager_caches(); +free_inodecache: + destroy_inodecache(); fail: return err; } From d59ff4df7b7ae39e6fb047db9e83cd899b5764f1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 20 Aug 2013 19:13:07 +0900 Subject: [PATCH 28/40] f2fs: fix wrong BUG_ON condition This patch removes a false-alaramed BUG_ON. The previous BUG_ON condition didn't cover the following true scenario. In f2fs_add_link, 1) get_new_data_page gives an uptodate page successfully, and then, 2) init_inode_metadata returns -ENOSPC. At this moment, a new clean data page is remained in the page cache, but its block address still indicates NEW_ADDR. After then, even if sync is called, this clean data page cannot be written to the disk due to the clean state. So this means that get_lock_data_page should make a new empty page when its block address is NEW_ADDR and its page is not uptodated. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ea3cb29018e9..6b328de41728 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -259,8 +259,17 @@ repeat: if (PageUptodate(page)) return page; - BUG_ON(dn.data_blkaddr == NEW_ADDR); - BUG_ON(dn.data_blkaddr == NULL_ADDR); + /* + * A new dentry page is allocated but not able to be written, since its + * new inode page couldn't be allocated due to -ENOSPC. + * In such the case, its blkaddr can be remained as NEW_ADDR. + * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + */ + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return page; + } err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) From 6e6b978c32bacd98a93e34af7f4222e76007705f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 23 Aug 2013 08:30:20 +0800 Subject: [PATCH 29/40] f2fs: fix error return code in init_f2fs_fs() Fix to return -ENOMEM in the kset create and add error handling case instead of 0, as done elsewhere in this function. Introduced by commit b59d0bae6ca30c496f298881616258f9cde0d9c6. (f2fs: add sysfs support for controlling the gc_thread) Signed-off-by: Wei Yongjun Acked-by: Namjae Jeon [Jaegeuk Kim: merge the patch with previous modification] Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9e51e4fc00a0..d28c4528eff8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1012,8 +1012,10 @@ static int __init init_f2fs_fs(void) if (err) goto free_gc_caches; f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); - if (!f2fs_kset) + if (!f2fs_kset) { + err = -ENOMEM; goto free_checkpoint_caches; + } err = register_filesystem(&f2fs_fs_type); if (err) goto free_kset; From 444c580f7e9ad29927a5d5269d576bd7cdccebb8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 8 Aug 2013 15:16:22 +0900 Subject: [PATCH 30/40] f2fs: add flags for inline xattrs This patch adds basic inode flags for inline xattrs, F2FS_INLINE_XATTR, and add a mount option, inline_xattr, which is enabled when xattr is set. If the mount option is enabled, all the files are marked with the inline_xattrs flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 18 ++++++++++++++++++ fs/f2fs/inode.c | 2 ++ fs/f2fs/super.c | 14 ++++++++++++++ include/linux/f2fs_fs.h | 4 +++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5348b63adbe9..b82f14199921 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -29,6 +29,7 @@ #define F2FS_MOUNT_XATTR_USER 0x00000010 #define F2FS_MOUNT_POSIX_ACL 0x00000020 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 +#define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -892,6 +893,7 @@ enum { FI_NO_ALLOC, /* should not allocate any blocks */ FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ + FI_INLINE_XATTR, /* used for inline xattr */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -924,6 +926,22 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) return 0; } +static inline void get_inline_info(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_INLINE_XATTR) + set_inode_flag(fi, FI_INLINE_XATTR); +} + +static inline void set_raw_inline(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + ri->i_inline = 0; + + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + ri->i_inline |= F2FS_INLINE_XATTR; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7f8569bd8759..9339cd292047 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -85,6 +85,7 @@ static int do_read_inode(struct inode *inode) fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); get_extent_info(&fi->ext, ri->i_ext); + get_inline_info(fi, ri); f2fs_put_page(node_page, 1); return 0; } @@ -164,6 +165,7 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(inode->i_blocks); set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + set_raw_inline(F2FS_I(inode), ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d28c4528eff8..70ecf484e7e5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -47,6 +47,7 @@ enum { Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, + Opt_inline_xattr, Opt_err, }; @@ -59,6 +60,7 @@ static match_table_t f2fs_tokens = { {Opt_noacl, "noacl"}, {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_inline_xattr, "inline_xattr"}, {Opt_err, NULL}, }; @@ -238,11 +240,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nouser_xattr: clear_opt(sbi, XATTR_USER); break; + case Opt_inline_xattr: + set_opt(sbi, INLINE_XATTR); + break; #else case Opt_nouser_xattr: f2fs_msg(sb, KERN_INFO, "nouser_xattr options not supported"); break; + case Opt_inline_xattr: + f2fs_msg(sb, KERN_INFO, + "inline_xattr options not supported"); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL case Opt_noacl: @@ -292,6 +301,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) set_inode_flag(fi, FI_NEW_INODE); + if (test_opt(F2FS_SB(sb), INLINE_XATTR)) + set_inode_flag(fi, FI_INLINE_XATTR); + return &fi->vfs_inode; } @@ -444,6 +456,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",user_xattr"); else seq_puts(seq, ",nouser_xattr"); + if (test_opt(sbi, INLINE_XATTR)) + seq_puts(seq, ",inline_xattr"); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 383d5e39b280..10ab11f8f99d 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -144,10 +144,12 @@ struct f2fs_extent { #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ + struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ - __u8 i_reserved; /* reserved */ + __u8 i_inline; /* file inline flags */ __le32 i_uid; /* user ID */ __le32 i_gid; /* group ID */ __le32 i_links; /* links count */ From de93653fe31fc9439971296842dcd0280f8ab5f4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 12 Aug 2013 21:08:03 +0900 Subject: [PATCH 31/40] f2fs: reserve the xattr space dynamically This patch enables the number of direct pointers inside on-disk inode block to be changed dynamically according to the size of inline xattr space. The number of direct pointers, ADDRS_PER_INODE, can be changed only if the file has inline xattr flag. The number of direct pointers that will be used by inline xattrs is defined as F2FS_INLINE_XATTR_ADDRS. Current patch assigns F2FS_INLINE_XATTR_ADDRS to 0 temporarily. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- fs/f2fs/f2fs.h | 18 ++++++++---------- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 9 ++++++--- fs/f2fs/node.c | 9 +++++---- fs/f2fs/recovery.c | 13 +++++++++---- fs/f2fs/super.c | 2 +- include/linux/f2fs_fs.h | 16 ++++++++++++---- 8 files changed, 45 insertions(+), 29 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6b328de41728..941f9b9ca3a5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -117,7 +117,8 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) block_t start_blkaddr, end_blkaddr; BUG_ON(blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; /* Update the page address in the parent node */ __set_data_blkaddr(dn, blk_addr); @@ -448,7 +449,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, unsigned int end_offset; end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE : + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; clear_buffer_new(bh_result); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b82f14199921..0343759ceb4c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -300,15 +300,6 @@ struct f2fs_sm_info { unsigned int ovp_segments; /* # of overprovision segments */ }; -/* - * For directory operation - */ -#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1) -#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2) -#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3) -#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4) -#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5) - /* * For superblock */ @@ -942,6 +933,13 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; } +static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) +{ + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; + return DEF_ADDRS_PER_INODE; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1108,7 +1106,7 @@ int do_write_data_page(struct page *); */ int start_gc_thread(struct f2fs_sb_info *); void stop_gc_thread(struct f2fs_sb_info *); -block_t start_bidx_of_node(unsigned int); +block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); int __init create_gc_caches(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd4184e35525..02c906971cc6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -290,7 +290,7 @@ static int truncate_blocks(struct inode *inode, u64 from) } if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE; + count = ADDRS_PER_INODE(F2FS_I(inode)); else count = ADDRS_PER_BLOCK; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e6b3ffd5ff6a..eb89037e3312 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -461,7 +461,7 @@ next_step: * as indirect or double indirect node blocks, are given, it must be a caller's * bug. */ -block_t start_bidx_of_node(unsigned int node_ofs) +block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) { unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; unsigned int bidx; @@ -478,7 +478,7 @@ block_t start_bidx_of_node(unsigned int node_ofs) int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 5 - dec; } - return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE; + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); } static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -586,7 +586,6 @@ next_step: continue; } - start_bidx = start_bidx_of_node(nofs); ofs_in_node = le16_to_cpu(entry->ofs_in_node); if (phase == 2) { @@ -594,6 +593,8 @@ next_step: if (IS_ERR(inode)) continue; + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = find_data_page(inode, start_bidx + ofs_in_node, false); if (IS_ERR(data_page)) @@ -604,6 +605,8 @@ next_step: } else { inode = find_gc_inode(dni.ino, ilist); if (inode) { + start_bidx = start_bidx_of_node(nofs, + F2FS_I(inode)); data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); if (IS_ERR(data_page)) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 818ff368de81..f0e733b21b2a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -315,9 +315,10 @@ cache: * The maximum depth is four. * Offset[0] will have raw inode offset. */ -static int get_node_path(long block, int offset[4], unsigned int noffset[4]) +static int get_node_path(struct f2fs_inode_info *fi, long block, + int offset[4], unsigned int noffset[4]) { - const long direct_index = ADDRS_PER_INODE; + const long direct_index = ADDRS_PER_INODE(fi); const long direct_blks = ADDRS_PER_BLOCK; const long dptrs_per_blk = NIDS_PER_BLOCK; const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; @@ -405,7 +406,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) int level, i; int err = 0; - level = get_node_path(index, offset, noffset); + level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); nids[0] = dn->inode->i_ino; npage[0] = dn->inode_page; @@ -687,7 +688,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); - level = get_node_path(from, offset, noffset); + level = get_node_path(F2FS_I(inode), from, offset, noffset); restart: page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fa493bb64167..51ef5eec33d7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -213,6 +213,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, void *kaddr; struct inode *inode; struct page *node_page; + unsigned int offset; block_t bidx; int i; @@ -257,8 +258,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, node_page = get_node_page(sbi, nid); if (IS_ERR(node_page)) return PTR_ERR(node_page); - bidx = start_bidx_of_node(ofs_of_node(node_page)) + - le16_to_cpu(sum.ofs_in_node); + + offset = ofs_of_node(node_page); ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); @@ -267,6 +268,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, if (IS_ERR(inode)) return PTR_ERR(inode); + bidx = start_bidx_of_node(offset, F2FS_I(inode)) + + le16_to_cpu(sum.ofs_in_node); + truncate_hole(inode, bidx, bidx + 1); iput(inode); return 0; @@ -275,6 +279,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, block_t blkaddr) { + struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int start, end; struct dnode_of_data dn; struct f2fs_summary sum; @@ -282,9 +287,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, int err = 0, recovered = 0; int ilock; - start = start_bidx_of_node(ofs_of_node(page)); + start = start_bidx_of_node(ofs_of_node(page), fi); if (IS_INODE(page)) - end = start + ADDRS_PER_INODE; + end = start + ADDRS_PER_INODE(fi); else end = start + ADDRS_PER_BLOCK; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 70ecf484e7e5..13d0a0fe49dd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -618,7 +618,7 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_size(unsigned bits) { - loff_t result = ADDRS_PER_INODE; + loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; /* two direct node blocks */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 10ab11f8f99d..93e7020fb7a8 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -140,9 +140,17 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ -#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define F2FS_INLINE_XATTR_ADDRS 0 /* 0 bytes for inline xattrs */ +#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ @@ -172,7 +180,7 @@ struct f2fs_inode { struct f2fs_extent i_ext; /* caching a largest extent */ - __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */ + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ __le32 i_nid[5]; /* direct(2), indirect(2), double_indirect(1) node id */ From dd9cfe236f95bbda9ceb5a4ca419b9fb574c95f9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 13 Aug 2013 10:13:55 +0900 Subject: [PATCH 32/40] f2fs: introduce __find_xattr for readability The __find_xattr is to search the wanted xattr entry starting from the base_addr. If not found, the returned entry is the last empty xattr entry that can be allocated newly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 46 +++++++++++++++++++++------------------------- fs/f2fs/xattr.h | 3 +-- 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3bc307c22b70..45a8ef882f89 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -246,6 +246,22 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) return handler; } +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, + size_t name_len, const char *name) +{ + struct f2fs_xattr_entry *entry; + + list_for_each_xattr(entry, base_addr) { + if (entry->e_name_index != name_index) + continue; + if (entry->e_name_len != name_len) + continue; + if (!memcmp(entry->e_name, name, name_len)) + break; + } + return entry; +} + int f2fs_getxattr(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size) { @@ -253,8 +269,7 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *entry; struct page *page; - void *base_addr; - int error = 0, found = 0; + int error = 0; size_t value_len, name_len; if (name == NULL) @@ -267,19 +282,9 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, page = get_node_page(sbi, fi->i_xattr_nid); if (IS_ERR(page)) return PTR_ERR(page); - base_addr = page_address(page); - list_for_each_xattr(entry, base_addr) { - if (entry->e_name_index != name_index) - continue; - if (entry->e_name_len != name_len) - continue; - if (!memcmp(entry->e_name, name, name_len)) { - found = 1; - break; - } - } - if (!found) { + entry = __find_xattr(page_address(page), name_index, name_len, name); + if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; } @@ -417,18 +422,9 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, } /* find entry with wanted name. */ - found = 0; - list_for_each_xattr(here, base_addr) { - if (here->e_name_index != name_index) - continue; - if (here->e_name_len != name_len) - continue; - if (!memcmp(here->e_name, name, name_len)) { - found = 1; - break; - } - } + here = __find_xattr(base_addr, name_index, name_len, name); + found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; last = here; while (!IS_XATTR_LAST_ENTRY(last)) diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 3c0817bef25d..c3ec042707d4 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -51,7 +51,7 @@ struct f2fs_xattr_entry { #define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) #define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) -#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) #define XATTR_ROUND (3) #define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) @@ -69,7 +69,6 @@ struct f2fs_xattr_entry { !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) - #define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \ sizeof(struct node_footer) - \ sizeof(__u32)) From 4f16fb0f9be3f5f9d1254ff6d7bf54b23fb65f4a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Aug 2013 20:40:06 +0900 Subject: [PATCH 33/40] f2fs: add the truncate_xattr_node function The truncate_xattr_node function will be used by inline xattr. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0343759ceb4c..0fe9302e3e17 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1016,6 +1016,7 @@ int is_checkpointed_node(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); +int truncate_xattr_node(struct inode *, struct page *); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f0e733b21b2a..2a464a76602d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -772,6 +772,29 @@ fail: return err > 0 ? 0 : err; } +int truncate_xattr_node(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct dnode_of_data dn; + struct page *npage; + + if (!nid) + return 0; + + npage = get_node_page(sbi, nid); + if (IS_ERR(npage)) + return PTR_ERR(npage); + + F2FS_I(inode)->i_xattr_nid = 0; + set_new_dnode(&dn, inode, page, npage, nid); + + if (page) + dn.inode_page_locked = 1; + truncate_node(&dn); + return 0; +} + /* * Caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). @@ -782,22 +805,16 @@ int remove_inode_page(struct inode *inode) struct page *page; nid_t ino = inode->i_ino; struct dnode_of_data dn; + int err; page = get_node_page(sbi, ino); if (IS_ERR(page)) return PTR_ERR(page); - if (F2FS_I(inode)->i_xattr_nid) { - nid_t nid = F2FS_I(inode)->i_xattr_nid; - struct page *npage = get_node_page(sbi, nid); - - if (IS_ERR(npage)) - return PTR_ERR(npage); - - F2FS_I(inode)->i_xattr_nid = 0; - set_new_dnode(&dn, inode, page, npage, nid); - dn.inode_page_locked = 1; - truncate_node(&dn); + err = truncate_xattr_node(inode, page); + if (err) { + f2fs_put_page(page, 1); + return err; } /* 0 is possible, after f2fs_new_inode() is failed */ From 65985d935ddd5657c66a8bb3ae9752ed842549b8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 14 Aug 2013 21:57:27 +0900 Subject: [PATCH 34/40] f2fs: support the inline xattrs 0. modified inode structure -------------------------------------- metadata (e.g., i_mtime, i_ctime, etc) -------------------------------------- direct pointers [0 ~ 873] inline xattrs (200 bytes by default) indirect pointers [0 ~ 4] -------------------------------------- node footer -------------------------------------- 1. setxattr flow - read_all_xattrs copies all the xattrs from inline and xattr node block. - handle xattr entries - write_all_xattrs copies modified xattrs into inline and xattr node block. 2. getxattr flow - read_all_xattrs copies all the xattrs from inline and xattr node block. - check target entries 3. Usage # mount -t f2fs -o inline_xattr $DEV $MNT Once mounted with the inline_xattr option, f2fs marks all the newly created files to reserve an amount of inline xattr space explicitly inside the inode block. Without the mount option, f2fs will not touch any existing files and newly created files as well. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 16 +++ fs/f2fs/node.c | 7 ++ fs/f2fs/xattr.c | 249 ++++++++++++++++++++++++++-------------- fs/f2fs/xattr.h | 12 +- include/linux/f2fs_fs.h | 2 +- 5 files changed, 196 insertions(+), 90 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0fe9302e3e17..608f0df5b919 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -940,6 +940,22 @@ static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) return DEF_ADDRS_PER_INODE; } +static inline void *inline_xattr_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS]); +} + +static inline int inline_xattr_size(struct inode *inode) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) + return F2FS_INLINE_XATTR_ADDRS << 2; + else + return 0; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2a464a76602d..c3c03c975bd6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -787,6 +787,10 @@ int truncate_xattr_node(struct inode *inode, struct page *page) return PTR_ERR(npage); F2FS_I(inode)->i_xattr_nid = 0; + + /* need to do checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + set_new_dnode(&dn, inode, page, npage, nid); if (page) @@ -1464,6 +1468,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; + if (!nid) + return; + spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); BUG_ON(!i || i->state != NID_ALLOC); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 45a8ef882f89..1ac8a5f6e380 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -262,13 +262,141 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, return entry; } +static void *read_all_xattrs(struct inode *inode, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_xattr_header *header; + size_t size = PAGE_SIZE, inline_size = 0; + void *txattr_addr; + + inline_size = inline_xattr_size(inode); + + txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); + if (!txattr_addr) + return NULL; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + goto fail; + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + } + + /* read from xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) + goto fail; + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + f2fs_put_page(xpage, 1); + } + + header = XATTR_HDR(txattr_addr); + + /* never been allocated xattrs */ + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } + return txattr_addr; +fail: + kzfree(txattr_addr); + return NULL; +} + +static inline int write_all_xattrs(struct inode *inode, __u32 hsize, + void *txattr_addr, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + size_t inline_size = 0; + void *xattr_addr; + struct page *xpage; + nid_t new_nid = 0; + int err; + + inline_size = inline_xattr_size(inode); + + if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) + if (!alloc_nid(sbi, &new_nid)) + return -ENOSPC; + + /* write to inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(page); + } + inline_addr = inline_xattr_addr(page); + } + memcpy(inline_addr, txattr_addr, inline_size); + f2fs_put_page(page, 1); + + /* no need to use xattr node block */ + if (hsize <= inline_size) { + err = truncate_xattr_node(inode, ipage); + alloc_nid_failed(sbi, new_nid); + return err; + } + } + + /* write to xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + BUG_ON(new_nid); + } else { + struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + alloc_nid_done(sbi, new_nid); + } + + xattr_addr = page_address(xpage); + memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - + sizeof(struct node_footer)); + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + /* need to checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + return 0; +} + int f2fs_getxattr(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *entry; - struct page *page; + void *base_addr; int error = 0; size_t value_len, name_len; @@ -276,14 +404,11 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, return -EINVAL; name_len = strlen(name); - if (!fi->i_xattr_nid) - return -ENODATA; + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) - return PTR_ERR(page); - - entry = __find_xattr(page_address(page), name_index, name_len, name); + entry = __find_xattr(base_addr, name_index, name_len, name); if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; @@ -303,28 +428,21 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, error = value_len; cleanup: - f2fs_put_page(page, 1); + kzfree(base_addr); return error; } ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = dentry->d_inode; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *entry; - struct page *page; void *base_addr; int error = 0; size_t rest = buffer_size; - if (!fi->i_xattr_nid) - return 0; - - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) - return PTR_ERR(page); - base_addr = page_address(page); + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -347,7 +465,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - f2fs_put_page(page, 1); + kzfree(base_addr); return error; } @@ -356,14 +474,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_xattr_header *header = NULL; struct f2fs_xattr_entry *here, *last; - struct page *page; void *base_addr; - int error, found, free, newsize; + int found, newsize; size_t name_len; - char *pval; int ilock; + __u32 new_hsize; + int error = -ENOMEM; if (name == NULL) return -EINVAL; @@ -373,53 +490,16 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, name_len = strlen(name); - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN) + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) return -ERANGE; f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - if (!fi->i_xattr_nid) { - /* Allocate new attribute block */ - struct dnode_of_data dn; - nid_t new_nid; - - if (!alloc_nid(sbi, &new_nid)) { - error = -ENOSPC; - goto exit; - } - set_new_dnode(&dn, inode, NULL, NULL, new_nid); - mark_inode_dirty(inode); - - page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); - if (IS_ERR(page)) { - alloc_nid_failed(sbi, new_nid); - error = PTR_ERR(page); - goto exit; - } - - alloc_nid_done(sbi, new_nid); - base_addr = page_address(page); - header = XATTR_HDR(base_addr); - header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); - header->h_refcount = cpu_to_le32(1); - } else { - /* The inode already has an extended attribute block. */ - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) { - error = PTR_ERR(page); - goto exit; - } - - base_addr = page_address(page); - header = XATTR_HDR(base_addr); - } - - if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { - error = -EIO; - goto cleanup; - } + base_addr = read_all_xattrs(inode, ipage); + if (!base_addr) + goto exit; /* find entry with wanted name. */ here = __find_xattr(base_addr, name_index, name_len, name); @@ -435,22 +515,25 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, /* 1. Check space */ if (value) { - /* If value is NULL, it is remove operation. + int free; + /* + * If value is NULL, it is remove operation. * In case of update operation, we caculate free. */ - free = MIN_OFFSET - ((char *)last - (char *)header); + free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) free = free - ENTRY_SIZE(here); if (free < newsize) { error = -ENOSPC; - goto cleanup; + goto exit; } } /* 2. Remove old entry */ if (found) { - /* If entry is found, remove old entry. + /* + * If entry is found, remove old entry. * If not found, remove operation is not needed. */ struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); @@ -461,10 +544,15 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, memset(last, 0, oldsize); } + new_hsize = (char *)last - (char *)base_addr; + /* 3. Write new entry */ if (value) { - /* Before we come here, old entry is removed. - * We just write new entry. */ + char *pval; + /* + * Before we come here, old entry is removed. + * We just write new entry. + */ memset(last, 0, newsize); last->e_name_index = name_index; last->e_name_len = name_len; @@ -472,10 +560,12 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, pval = last->e_name + name_len; memcpy(pval, value, value_len); last->e_value_size = cpu_to_le16(value_len); + new_hsize += newsize; } - set_page_dirty(page); - f2fs_put_page(page, 1); + error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + if (error) + goto exit; if (is_inode_flag_set(fi, FI_ACL_MODE)) { inode->i_mode = fi->i_acl_mode; @@ -483,19 +573,12 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, clear_inode_flag(fi, FI_ACL_MODE); } - /* store checkpoint version for conducting checkpoint during fsync */ - fi->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); - if (ipage) update_inode(inode, ipage); else update_inode_page(inode); - mutex_unlock_op(sbi, ilock); - - return 0; -cleanup: - f2fs_put_page(page, 1); exit: mutex_unlock_op(sbi, ilock); + kzfree(base_addr); return error; } diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index c3ec042707d4..02a08fb88a15 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -69,16 +69,16 @@ struct f2fs_xattr_entry { !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) -#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \ - sizeof(struct node_footer) - \ - sizeof(__u32)) +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ + sizeof(struct node_footer) - sizeof(__u32)) -#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \ - sizeof(struct f2fs_xattr_entry)) +#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ + sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) /* * On-disk structure of f2fs_xattr - * We use only 1 block for xattr. + * We use inline xattrs space + 1 block for xattr. * * +--------------------+ * | f2fs_xattr_header | diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 93e7020fb7a8..bb942f6d5702 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -140,7 +140,7 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 0 /* 0 bytes for inline xattrs */ +#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define ADDRS_PER_INODE(fi) addrs_per_inode(fi) #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ From 8cb82688097b300dd3f107c3e19c92662fba5359 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 26 Aug 2013 21:25:31 +0900 Subject: [PATCH 35/40] f2fs: fix omitting to update inode page The f2fs_set_link updates its parent inode number, so we should sync this to the inode block. Otherwise, the data can be lost after sudden-power-off. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 4e475181280c..7e43f6829afd 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -447,6 +447,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, else release_orphan_inode(sbi); + update_inode_page(old_inode); update_inode_page(new_inode); } else { err = f2fs_add_link(new_dentry, old_inode); From 749ebfd174bd6fca3083da5f1e7933cbd60316c9 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 22 Aug 2013 18:11:25 +0800 Subject: [PATCH 36/40] f2fs: use strncasecmp() simplify the string comparison Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7e43f6829afd..2a5359c990fc 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -83,21 +83,11 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); - int ret; if (sublen > slen) return 0; - ret = memcmp(s + slen - sublen, sub, sublen); - if (ret) { /* compare upper case */ - int i; - char upper_sub[8]; - for (i = 0; i < sublen && i < sizeof(upper_sub); i++) - upper_sub[i] = toupper(sub[i]); - return !memcmp(s + slen - sublen, upper_sub, sublen); - } - - return !ret; + return !strncasecmp(s + slen - sublen, sub, sublen); } /* From c34e333fd5ca41317c3dab69ed8a212acacd4aeb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Sep 2013 09:46:45 +0900 Subject: [PATCH 37/40] f2fs: trigger GC when there are prefree segments Previously, f2fs conducts SSR when free_sections() < overprovision_sections. But, even though there are a lot of prefree segments, it can consider SSR only. So, let's consider the number of prefree segments too for triggering SSR. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 062424a0e4c3..cd9bd93212ad 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -453,7 +453,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) static inline bool need_SSR(struct f2fs_sb_info *sbi) { - return (free_sections(sbi) < overprovision_sections(sbi)); + return ((prefree_segments(sbi) / sbi->segs_per_sec) + + free_sections(sbi) < overprovision_sections(sbi)); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) From 222cbdc4835f8151b886b049d6ad56b18f88d470 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 3 Sep 2013 13:41:37 +0900 Subject: [PATCH 38/40] f2fs: avoid an overflow during utilization calculation The current f2fs uses all the block counts with 32 bit numbers, which is able to cover about 15TB volume. But in calculation of utilization, f2fs multiplies the count by 100 which can induce overflow. This patch fixes this. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index cd9bd93212ad..e0d6d3abf396 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -471,7 +471,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) static inline int utilization(struct f2fs_sb_info *sbi) { - return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); + return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); } /* From 423e95ccbe2e2612ed9fe41667acfc338f3af07b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 5 Sep 2013 10:07:15 +0900 Subject: [PATCH 39/40] f2fs: merge more bios of node block writes Previously, we experience bio traces as follows when running simple sequential write test. f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500104928, size = 4K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 499922208, size = 368K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 499914752, size = 140K -> total 512K The first one is to write an indirect node block, and the others are to write direct node blocks. The reason why there are two separate bios for direct node blocks is: 0. initial state ------------------ ------------------ | | |xxxxxxxx | ------------------ ------------------ 1. write 368K ------------------ ------------------ | | |xxxxxxxxWWWWWWWW| ------------------ ------------------ 2. write 140K ------------------ ------------------ |WWWWWWW | |xxxxxxxxWWWWWWWW| ------------------ ------------------ This is because f2fs_write_node_pages tries to write just 512K totally, so that we can lose the chance to merge more bios nicely. After this patch is applied, we can get the following bio traces. f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500103168, size = 8K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500111368, size = 4K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500107272, size = 512K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500108296, size = 512K f2fs_do_submit_bio: type = NODE, io = no sync, sector = 500109320, size = 500K And finally, we can improve the sequential write performance, from 458.775 MB/s to 479.945 MB/s on SSD. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c3c03c975bd6..51ef27894433 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1191,9 +1191,9 @@ static int f2fs_write_node_page(struct page *page, /* * It is very important to gather dirty pages and write at once, so that we can * submit a big bio without interfering other data writes. - * Be default, 512 pages (2MB), a segment size, is quite reasonable. + * Be default, 512 pages (2MB) * 3 node types, is more reasonable. */ -#define COLLECT_DIRTY_NODES 512 +#define COLLECT_DIRTY_NODES 1536 static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1211,9 +1211,10 @@ static int f2fs_write_node_pages(struct address_space *mapping, return 0; /* if mounting is failed, skip writing node pages */ - wbc->nr_to_write = max_hw_blocks(sbi); + wbc->nr_to_write = 3 * max_hw_blocks(sbi); sync_node_pages(sbi, 0, wbc); - wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); + wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - + wbc->nr_to_write); return 0; } From a26b7c8a0149ce1e3b6a10f2801aada6e447e4e7 Mon Sep 17 00:00:00 2001 From: Jin Xu Date: Thu, 5 Sep 2013 12:45:26 +0800 Subject: [PATCH 40/40] f2fs: optimize gc for better performance This patch improves the gc efficiency by optimizing the victim selection policy. With this optimization, the random re-write performance could increase up to 20%. For f2fs, when disk is in shortage of free spaces, gc will selects dirty segments and moves valid blocks around for making more space available. The gc cost of a segment is determined by the valid blocks in the segment. The less the valid blocks, the higher the efficiency. The ideal victim segment is the one that has the most garbage blocks. Currently, it searches up to 20 dirty segments for a victim segment. The selected victim is not likely the best victim for gc when there are much more dirty segments. Why not searching more dirty segments for a better victim? The cost of searching dirty segments is negligible in comparison to moving blocks. In this patch, it enlarges the MAX_VICTIM_SEARCH to 4096 to make the search more aggressively for a possible better victim. Since it also applies to victim selection for SSR, it will likely improve the SSR efficiency as well. The test case is simple. It creates as many files until the disk full. The size for each file is 32KB. Then it writes as many as 100000 records of 4KB size to random offsets of random files in sync mode. The testing was done on a 2GB partition of a SDHC card. Let's see the test result of f2fs without and with the patch. --------------------------------------- 2GB partition, SDHC create 52023 files of size 32768 bytes random re-write 100000 records of 4KB --------------------------------------- | file creation (s) | rewrite time (s) | gc count | gc garbage blocks | [no patch] 341 4227 1174 174840 [patched] 324 2958 645 106682 It's obvious that, with the patch, f2fs finishes the test in 20+% less time than without the patch. And internally it does much less gc with higher efficiency than before. Since the performance improvement is related to gc, it might not be so obvious for other tests that do not trigger gc as often as this one ( This is because f2fs selects dirty segments for SSR use most of the time when free space is in shortage). The well-known iozone test tool was not used for benchmarking the patch becuase it seems do not have a test case that performs random re-write on a full disk. This patch is the revised version based on the suggestion from Jaegeuk Kim. Signed-off-by: Jin Xu [Jaegeuk Kim: suggested simpler solution] Reviewed-by: Jaegeuk Kim Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +++++++- fs/f2fs/gc.h | 2 +- fs/f2fs/segment.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index eb89037e3312..2f157e883687 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -153,12 +153,18 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; } + + if (p->max_search > MAX_VICTIM_SEARCH) + p->max_search = MAX_VICTIM_SEARCH; + p->offset = sbi->last_victim[p->gc_mode]; } @@ -305,7 +311,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (cost == max_cost) continue; - if (nsearched++ >= MAX_VICTIM_SEARCH) { + if (nsearched++ >= p.max_search) { sbi->last_victim[p.gc_mode] = segno; break; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c22dee9f1422..507056d22205 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -20,7 +20,7 @@ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ /* Search max. number of dirty segments to select a victim segment */ -#define MAX_VICTIM_SEARCH 20 +#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e0d6d3abf396..bdd10eab8c40 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -142,6 +142,7 @@ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int max_search; /* maximum # of segments to search */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */