diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs new file mode 100644 index 000000000000..31942efcaf0e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -0,0 +1,26 @@ +What: /sys/fs/f2fs//gc_max_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the maximun sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_min_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the minimum sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_no_gc_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the default sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_idle +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the victim selection policy for garbage collection. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b91e2f26b672..3cd27bed6349 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -18,8 +18,8 @@ according to its internal geometry or flash memory management scheme, namely FTL F2FS and its tools support various parameters not only for configuring on-disk layout, but also for selecting allocation and cleaning algorithms. -The file system formatting tool, "mkfs.f2fs", is available from the following -git tree: +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). >> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git For reporting bugs and sending patches, please use the following mailing list: @@ -132,6 +132,38 @@ f2fs. Each file shows the whole f2fs information. - average SIT information about whole segments - current memory footprint consumed by f2fs. +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + ================================================================================ USAGE ================================================================================ @@ -149,8 +181,12 @@ USAGE # mkfs.f2fs -l label /dev/block_device # mount -t f2fs /dev/block_device /mnt/f2fs -Format options --------------- +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: -l [label] : Give a volume label, up to 512 unicode name. -a [0 or 1] : Split start location of each area for heap-based allocation. 1 is set by default, which performs this. @@ -164,6 +200,37 @@ Format options -t [0 or 1] : Disable discard command or not. 1 is set by default, which conducts discard. +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + ================================================================================ DESIGN ================================================================================ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 66a6b85a51d8..bb312201ca95 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -182,7 +182,7 @@ const struct address_space_operations f2fs_meta_aops = { .set_page_dirty = f2fs_set_meta_page_dirty, }; -int check_orphan_space(struct f2fs_sb_info *sbi) +int acquire_orphan_inode(struct f2fs_sb_info *sbi) { unsigned int max_orphans; int err = 0; @@ -197,10 +197,19 @@ int check_orphan_space(struct f2fs_sb_info *sbi) mutex_lock(&sbi->orphan_inode_mutex); if (sbi->n_orphans >= max_orphans) err = -ENOSPC; + else + sbi->n_orphans++; mutex_unlock(&sbi->orphan_inode_mutex); return err; } +void release_orphan_inode(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->orphan_inode_mutex); + sbi->n_orphans--; + mutex_unlock(&sbi->orphan_inode_mutex); +} + void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct list_head *head, *this; @@ -229,21 +238,18 @@ retry: list_add(&new->list, this->prev); else list_add_tail(&new->list, head); - - sbi->n_orphans++; out: mutex_unlock(&sbi->orphan_inode_mutex); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { - struct list_head *this, *next, *head; + struct list_head *head; struct orphan_inode_entry *orphan; mutex_lock(&sbi->orphan_inode_mutex); head = &sbi->orphan_inode_list; - list_for_each_safe(this, next, head) { - orphan = list_entry(this, struct orphan_inode_entry, list); + list_for_each_entry(orphan, head, list) { if (orphan->ino == ino) { list_del(&orphan->list); kmem_cache_free(orphan_entry_slab, orphan); @@ -373,7 +379,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; - pre_version = le64_to_cpu(cp_block->checkpoint_ver); + pre_version = cur_cp_version(cp_block); /* Read the 2nd cp block in this CP pack */ cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; @@ -388,7 +394,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; - cur_version = le64_to_cpu(cp_block->checkpoint_ver); + cur_version = cur_cp_version(cp_block); if (cur_version == pre_version) { *version = cur_version; @@ -793,7 +799,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) * Increase the version number so that * SIT entries and seg summaries are written at correct place */ - ckpt_ver = le64_to_cpu(ckpt->checkpoint_ver); + ckpt_ver = cur_cp_version(ckpt); ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 035f9a345cdf..941f9b9ca3a5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -37,9 +37,9 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) struct page *node_page = dn->node_page; unsigned int ofs_in_node = dn->ofs_in_node; - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, false); - rn = (struct f2fs_node *)page_address(node_page); + rn = F2FS_NODE(node_page); /* Get physical address of data block */ addr_array = blkaddr_in_node(rn); @@ -117,7 +117,8 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) block_t start_blkaddr, end_blkaddr; BUG_ON(blk_addr == NEW_ADDR); - fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node; + fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + + dn->ofs_in_node; /* Update the page address in the parent node */ __set_data_blkaddr(dn, blk_addr); @@ -176,7 +177,6 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) end_update: write_unlock(&fi->ext.ext_lock); sync_inode_page(dn); - return; } struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) @@ -260,8 +260,17 @@ repeat: if (PageUptodate(page)) return page; - BUG_ON(dn.data_blkaddr == NEW_ADDR); - BUG_ON(dn.data_blkaddr == NULL_ADDR); + /* + * A new dentry page is allocated but not able to be written, since its + * new inode page couldn't be allocated due to -ENOSPC. + * In such the case, its blkaddr can be remained as NEW_ADDR. + * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. + */ + if (dn.data_blkaddr == NEW_ADDR) { + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + return page; + } err = f2fs_readpage(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) @@ -365,7 +374,6 @@ static void read_end_io(struct bio *bio, int err) } unlock_page(page); } while (bvec >= bio->bi_io_vec); - kfree(bio->bi_private); bio_put(bio); } @@ -391,7 +399,6 @@ int f2fs_readpage(struct f2fs_sb_info *sbi, struct page *page, bio->bi_end_io = read_end_io; if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { - kfree(bio->bi_private); bio_put(bio); up_read(&sbi->bio_sem); f2fs_put_page(page, 1); @@ -442,7 +449,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock, unsigned int end_offset; end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE : + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; clear_buffer_new(bh_result); @@ -636,9 +643,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, int err = 0; int ilock; - /* for nobh_write_end */ - *fsdata = NULL; - f2fs_balance_fs(sbi); repeat: page = grab_cache_page_write_begin(mapping, index, flags); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0d6c6aafb235..a84b0a8e6854 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -29,7 +29,7 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); int i; /* valid check of the segment numbers */ @@ -83,7 +83,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) */ static void update_sit_info(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; struct sit_info *sit_i = SIT_I(sbi); unsigned int segno, vblocks; @@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) */ static void update_mem_info(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; if (si->base_mem) @@ -253,21 +253,21 @@ static int stat_show(struct seq_file *s, void *v) si->nats, NM_WOUT_THRESHOLD); seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", si->sits, si->fnids); - seq_printf(s, "\nDistribution of User Blocks:"); - seq_printf(s, " [ valid | invalid | free ]\n"); - seq_printf(s, " ["); + seq_puts(s, "\nDistribution of User Blocks:"); + seq_puts(s, " [ valid | invalid | free ]\n"); + seq_puts(s, " ["); for (j = 0; j < si->util_valid; j++) - seq_printf(s, "-"); - seq_printf(s, "|"); + seq_putc(s, '-'); + seq_putc(s, '|'); for (j = 0; j < si->util_invalid; j++) - seq_printf(s, "-"); - seq_printf(s, "|"); + seq_putc(s, '-'); + seq_putc(s, '|'); for (j = 0; j < si->util_free; j++) - seq_printf(s, "-"); - seq_printf(s, "]\n\n"); + seq_putc(s, '-'); + seq_puts(s, "]\n\n"); seq_printf(s, "SSR: %u blocks in %u segments\n", si->block_count[SSR], si->segment_count[SSR]); seq_printf(s, "LFS: %u blocks in %u segments\n", @@ -305,11 +305,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; - sbi->stat_info = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); - if (!sbi->stat_info) + si = kzalloc(sizeof(struct f2fs_stat_info), GFP_KERNEL); + if (!si) return -ENOMEM; - si = sbi->stat_info; si->all_area_segs = le32_to_cpu(raw_super->segment_count); si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); @@ -319,6 +318,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->main_area_zones = si->main_area_sections / le32_to_cpu(raw_super->secs_per_zone); si->sbi = sbi; + sbi->stat_info = si; mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); @@ -329,13 +329,13 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { - struct f2fs_stat_info *si = sbi->stat_info; + struct f2fs_stat_info *si = F2FS_STAT(sbi); mutex_lock(&f2fs_stat_mutex); list_del(&si->stat_list); mutex_unlock(&f2fs_stat_mutex); - kfree(sbi->stat_info); + kfree(si); } void __init f2fs_create_root_stats(void) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 62f0d5977c64..384c6daf9a89 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -270,12 +270,27 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) struct f2fs_node *rn; /* copy name info. to this inode page */ - rn = (struct f2fs_node *)page_address(ipage); + rn = F2FS_NODE(ipage); rn->i.i_namelen = cpu_to_le32(name->len); memcpy(rn->i.i_name, name->name, name->len); set_page_dirty(ipage); } +int update_dent_inode(struct inode *inode, const struct qstr *name) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *page; + + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + init_dent_inode(name, page); + f2fs_put_page(page, 1); + + return 0; +} + static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { @@ -557,6 +572,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, if (inode->i_nlink == 0) add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); } if (bit_pos == NR_DENTRY_IN_BLOCK) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 467d42d65c48..608f0df5b919 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * For mount options @@ -28,6 +29,7 @@ #define F2FS_MOUNT_XATTR_USER 0x00000010 #define F2FS_MOUNT_POSIX_ACL 0x00000020 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 +#define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -134,11 +136,13 @@ static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) /* * For INODE and NODE manager */ -#define XATTR_NODE_OFFSET (-1) /* - * store xattrs to one node block per - * file keeping -1 as its node offset to - * distinguish from index node blocks. - */ +/* + * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 + * as its node offset to distinguish from index node blocks. + * But some bits are used to mark the node block. + */ +#define XATTR_NODE_OFFSET ((((unsigned int)-1) << OFFSET_BIT_SHIFT) \ + >> OFFSET_BIT_SHIFT) enum { ALLOC_NODE, /* allocate a new node page if needed */ LOOKUP_NODE, /* look up a node without readahead */ @@ -178,6 +182,7 @@ struct f2fs_inode_info { f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ + unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ }; @@ -295,15 +300,6 @@ struct f2fs_sm_info { unsigned int ovp_segments; /* # of overprovision segments */ }; -/* - * For directory operation - */ -#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1) -#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2) -#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3) -#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4) -#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5) - /* * For superblock */ @@ -350,6 +346,7 @@ enum page_type { struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ + struct proc_dir_entry *s_proc; /* proc entry */ struct buffer_head *raw_super_buf; /* buffer head of raw sb */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int s_dirty; /* dirty flag for checkpoint */ @@ -429,6 +426,10 @@ struct f2fs_sb_info { #endif unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; }; /* @@ -454,6 +455,11 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } +static inline struct f2fs_node *F2FS_NODE(struct page *page) +{ + return (struct f2fs_node *)page_address(page); +} + static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) { return (struct f2fs_nm_info *)(sbi->nm_info); @@ -489,6 +495,11 @@ static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) sbi->s_dirty = 0; } +static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) +{ + return le64_to_cpu(cp->checkpoint_ver); +} + static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) { unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); @@ -677,7 +688,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) { block_t start_addr; struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); + unsigned long long ckpt_version = cur_cp_version(ckpt); start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); @@ -812,7 +823,7 @@ static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, static inline bool IS_INODE(struct page *page) { - struct f2fs_node *p = (struct f2fs_node *)page_address(page); + struct f2fs_node *p = F2FS_NODE(page); return RAW_IS_INODE(p); } @@ -826,7 +837,7 @@ static inline block_t datablock_addr(struct page *node_page, { struct f2fs_node *raw_node; __le32 *addr_array; - raw_node = (struct f2fs_node *)page_address(node_page); + raw_node = F2FS_NODE(node_page); addr_array = blkaddr_in_node(raw_node); return le32_to_cpu(addr_array[offset]); } @@ -873,6 +884,7 @@ enum { FI_NO_ALLOC, /* should not allocate any blocks */ FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ + FI_INLINE_XATTR, /* used for inline xattr */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -905,6 +917,45 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) return 0; } +static inline void get_inline_info(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + if (ri->i_inline & F2FS_INLINE_XATTR) + set_inode_flag(fi, FI_INLINE_XATTR); +} + +static inline void set_raw_inline(struct f2fs_inode_info *fi, + struct f2fs_inode *ri) +{ + ri->i_inline = 0; + + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + ri->i_inline |= F2FS_INLINE_XATTR; +} + +static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) +{ + if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; + return DEF_ADDRS_PER_INODE; +} + +static inline void *inline_xattr_addr(struct page *page) +{ + struct f2fs_inode *ri; + ri = (struct f2fs_inode *)page_address(page); + return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - + F2FS_INLINE_XATTR_ADDRS]); +} + +static inline int inline_xattr_size(struct inode *inode) +{ + if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) + return F2FS_INLINE_XATTR_ADDRS << 2; + else + return 0; +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -947,6 +998,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); +int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); @@ -980,6 +1032,7 @@ int is_checkpointed_node(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); +int truncate_xattr_node(struct inode *, struct page *); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); @@ -1012,7 +1065,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); struct bio *f2fs_bio_alloc(struct block_device *, int); -void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool); +void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool); void write_meta_page(struct f2fs_sb_info *, struct page *); void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, block_t, block_t *); @@ -1037,7 +1091,8 @@ void destroy_segment_manager(struct f2fs_sb_info *); struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); -int check_orphan_space(struct f2fs_sb_info *); +int acquire_orphan_inode(struct f2fs_sb_info *); +void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); @@ -1068,7 +1123,7 @@ int do_write_data_page(struct page *); */ int start_gc_thread(struct f2fs_sb_info *); void stop_gc_thread(struct f2fs_sb_info *); -block_t start_bidx_of_node(unsigned int); +block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); int f2fs_gc(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *); int __init create_gc_caches(void); @@ -1112,11 +1167,16 @@ struct f2fs_stat_info { unsigned base_mem, cache_mem; }; +static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_stat_info*)sbi->stat_info; +} + #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_seg_count(sbi, type) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ (si)->tot_segs++; \ if (type == SUM_TYPE_DATA) \ si->data_segs++; \ @@ -1129,14 +1189,14 @@ struct f2fs_stat_info { #define stat_inc_data_blk_count(sbi, blks) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ } while (0) #define stat_inc_node_blk_count(sbi, blks) \ do { \ - struct f2fs_stat_info *si = sbi->stat_info; \ + struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ } while (0) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d2d2b7dbdcc1..02c906971cc6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -112,11 +112,13 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) if (!dentry) return 0; - inode = igrab(dentry->d_parent->d_inode); - dput(dentry); + if (update_dent_inode(inode, &dentry->d_name)) { + dput(dentry); + return 0; + } - *pino = inode->i_ino; - iput(inode); + *pino = parent_ino(dentry); + dput(dentry); return 1; } @@ -147,9 +149,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) mutex_lock(&inode->i_mutex); - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - goto out; - + /* + * Both of fdatasync() and fsync() are able to be recovered from + * sudden-power-off. + */ if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; else if (file_wrong_pino(inode)) @@ -158,10 +161,14 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; if (need_cp) { nid_t pino; + F2FS_I(inode)->xattr_ver = 0; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); if (file_wrong_pino(inode) && inode->i_nlink == 1 && @@ -205,7 +212,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) struct f2fs_node *raw_node; __le32 *addr; - raw_node = page_address(dn->node_page); + raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + ofs; for ( ; count > 0; count--, addr++, dn->ofs_in_node++) { @@ -283,7 +290,7 @@ static int truncate_blocks(struct inode *inode, u64 from) } if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE; + count = ADDRS_PER_INODE(F2FS_I(inode)); else count = ADDRS_PER_BLOCK; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 35f9b1a196aa..2f157e883687 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -29,10 +29,11 @@ static struct kmem_cache *winode_slab; static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; long wait_ms; - wait_ms = GC_THREAD_MIN_SLEEP_TIME; + wait_ms = gc_th->min_sleep_time; do { if (try_to_freeze()) @@ -45,7 +46,7 @@ static int gc_thread_func(void *data) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = GC_THREAD_MAX_SLEEP_TIME; + wait_ms = increase_sleep_time(gc_th, wait_ms); continue; } @@ -66,15 +67,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(wait_ms); + wait_ms = decrease_sleep_time(gc_th, wait_ms); else - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); #ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; @@ -82,7 +83,7 @@ static int gc_thread_func(void *data) /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) - wait_ms = GC_THREAD_NOGC_SLEEP_TIME; + wait_ms = gc_th->no_gc_sleep_time; } while (!kthread_should_stop()); return 0; } @@ -101,6 +102,12 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + + gc_th->gc_idle = 0; + sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, @@ -125,9 +132,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = NULL; } -static int select_gc_type(int gc_type) +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { - return (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; } static void select_policy(struct f2fs_sb_info *sbi, int gc_type, @@ -138,12 +153,18 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (p->alloc_mode == SSR) { p->gc_mode = GC_GREEDY; p->dirty_segmap = dirty_i->dirty_segmap[type]; + p->max_search = dirty_i->nr_dirty[type]; p->ofs_unit = 1; } else { - p->gc_mode = select_gc_type(gc_type); + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; + p->max_search = dirty_i->nr_dirty[DIRTY]; p->ofs_unit = sbi->segs_per_sec; } + + if (p->max_search > MAX_VICTIM_SEARCH) + p->max_search = MAX_VICTIM_SEARCH; + p->offset = sbi->last_victim[p->gc_mode]; } @@ -290,7 +311,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (cost == max_cost) continue; - if (nsearched++ >= MAX_VICTIM_SEARCH) { + if (nsearched++ >= p.max_search) { sbi->last_victim[p.gc_mode] = segno; break; } @@ -407,8 +428,7 @@ next_step: /* set page dirty and write it */ if (gc_type == FG_GC) { - f2fs_submit_bio(sbi, NODE, true); - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, true); set_page_dirty(node_page); } else { if (!PageWriteback(node_page)) @@ -447,7 +467,7 @@ next_step: * as indirect or double indirect node blocks, are given, it must be a caller's * bug. */ -block_t start_bidx_of_node(unsigned int node_ofs) +block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi) { unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4; unsigned int bidx; @@ -464,7 +484,7 @@ block_t start_bidx_of_node(unsigned int node_ofs) int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1); bidx = node_ofs - 5 - dec; } - return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE; + return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi); } static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, @@ -508,10 +528,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) } else { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - if (PageWriteback(page)) { - f2fs_submit_bio(sbi, DATA, true); - wait_on_page_writeback(page); - } + f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page) && S_ISDIR(inode->i_mode)) { @@ -575,7 +592,6 @@ next_step: continue; } - start_bidx = start_bidx_of_node(nofs); ofs_in_node = le16_to_cpu(entry->ofs_in_node); if (phase == 2) { @@ -583,6 +599,8 @@ next_step: if (IS_ERR(inode)) continue; + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = find_data_page(inode, start_bidx + ofs_in_node, false); if (IS_ERR(data_page)) @@ -593,6 +611,8 @@ next_step: } else { inode = find_gc_inode(dni.ino, ilist); if (inode) { + start_bidx = start_bidx_of_node(nofs, + F2FS_I(inode)); data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); if (IS_ERR(data_page)) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 2c6a6bd08322..507056d22205 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,18 +13,26 @@ * whether IO subsystem is idle * or not */ -#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ -#define GC_THREAD_MAX_SLEEP_TIME 60000 -#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ /* Search max. number of dirty segments to select a victim segment */ -#define MAX_VICTIM_SEARCH 20 +#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */ struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; }; struct inode_entry { @@ -56,25 +64,25 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(long wait) +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) + if (wait == gc_th->no_gc_sleep_time) return wait; - wait += GC_THREAD_MIN_SLEEP_TIME; - if (wait > GC_THREAD_MAX_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; return wait; } -static inline long decrease_sleep_time(long wait) +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; - wait -= GC_THREAD_MIN_SLEEP_TIME; - if (wait <= GC_THREAD_MIN_SLEEP_TIME) - wait = GC_THREAD_MIN_SLEEP_TIME; + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; return wait; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2b2d45d19e3e..9339cd292047 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -56,7 +56,7 @@ static int do_read_inode(struct inode *inode) if (IS_ERR(node_page)) return PTR_ERR(node_page); - rn = page_address(node_page); + rn = F2FS_NODE(node_page); ri = &(rn->i); inode->i_mode = le16_to_cpu(ri->i_mode); @@ -85,6 +85,7 @@ static int do_read_inode(struct inode *inode) fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); get_extent_info(&fi->ext, ri->i_ext); + get_inline_info(fi, ri); f2fs_put_page(node_page, 1); return 0; } @@ -151,9 +152,9 @@ void update_inode(struct inode *inode, struct page *node_page) struct f2fs_node *rn; struct f2fs_inode *ri; - wait_on_page_writeback(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, false); - rn = page_address(node_page); + rn = F2FS_NODE(node_page); ri = &(rn->i); ri->i_mode = cpu_to_le16(inode->i_mode); @@ -164,6 +165,7 @@ void update_inode(struct inode *inode, struct page *node_page) ri->i_size = cpu_to_le64(i_size_read(inode)); ri->i_blocks = cpu_to_le64(inode->i_blocks); set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext); + set_raw_inline(F2FS_I(inode), ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -221,9 +223,6 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) return 0; - if (wbc) - f2fs_balance_fs(sbi); - /* * We need to lock here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. @@ -231,6 +230,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) ilock = mutex_lock_op(sbi); ret = update_inode_page(inode); mutex_unlock_op(sbi, ilock); + + if (wbc) + f2fs_balance_fs(sbi); + return ret; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 64c07169df05..2a5359c990fc 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -83,21 +83,11 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); - int ret; if (sublen > slen) return 0; - ret = memcmp(s + slen - sublen, sub, sublen); - if (ret) { /* compare upper case */ - int i; - char upper_sub[8]; - for (i = 0; i < sublen && i < sizeof(upper_sub); i++) - upper_sub[i] = toupper(sub[i]); - return !memcmp(s + slen - sublen, upper_sub, sublen); - } - - return !ret; + return !strncasecmp(s + slen - sublen, sub, sublen); } /* @@ -239,7 +229,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (!de) goto fail; - err = check_orphan_space(sbi); + err = acquire_orphan_inode(sbi); if (err) { kunmap(page); f2fs_put_page(page, 0); @@ -393,7 +383,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *old_dir_page; - struct page *old_page; + struct page *old_page, *new_page; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; @@ -415,7 +405,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, ilock = mutex_lock_op(sbi); if (new_inode) { - struct page *new_page; err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) @@ -427,14 +416,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_dir; + err = acquire_orphan_inode(sbi); + if (err) + goto put_out_dir; + + if (update_dent_inode(old_inode, &new_dentry->d_name)) { + release_orphan_inode(sbi); + goto put_out_dir; + } + f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_inode->i_ctime = CURRENT_TIME; if (old_dir_entry) drop_nlink(new_inode); drop_nlink(new_inode); + if (!new_inode->i_nlink) add_orphan_inode(sbi, new_inode->i_ino); + else + release_orphan_inode(sbi); + + update_inode_page(old_inode); update_inode_page(new_inode); } else { err = f2fs_add_link(new_dentry, old_inode); @@ -467,6 +470,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, mutex_unlock_op(sbi, ilock); return 0; +put_out_dir: + f2fs_put_page(new_page, 1); out_dir: if (old_dir_entry) { kunmap(old_dir_page); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b418aee09573..51ef27894433 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -315,9 +315,10 @@ cache: * The maximum depth is four. * Offset[0] will have raw inode offset. */ -static int get_node_path(long block, int offset[4], unsigned int noffset[4]) +static int get_node_path(struct f2fs_inode_info *fi, long block, + int offset[4], unsigned int noffset[4]) { - const long direct_index = ADDRS_PER_INODE; + const long direct_index = ADDRS_PER_INODE(fi); const long direct_blks = ADDRS_PER_BLOCK; const long dptrs_per_blk = NIDS_PER_BLOCK; const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK; @@ -405,7 +406,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) int level, i; int err = 0; - level = get_node_path(index, offset, noffset); + level = get_node_path(F2FS_I(dn->inode), index, offset, noffset); nids[0] = dn->inode->i_ino; npage[0] = dn->inode_page; @@ -565,7 +566,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, return PTR_ERR(page); } - rn = (struct f2fs_node *)page_address(page); + rn = F2FS_NODE(page); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -687,7 +688,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); - level = get_node_path(from, offset, noffset); + level = get_node_path(F2FS_I(inode), from, offset, noffset); restart: page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -698,7 +699,7 @@ restart: set_new_dnode(&dn, inode, page, NULL, 0); unlock_page(page); - rn = page_address(page); + rn = F2FS_NODE(page); switch (level) { case 0: case 1: @@ -771,6 +772,33 @@ fail: return err > 0 ? 0 : err; } +int truncate_xattr_node(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t nid = F2FS_I(inode)->i_xattr_nid; + struct dnode_of_data dn; + struct page *npage; + + if (!nid) + return 0; + + npage = get_node_page(sbi, nid); + if (IS_ERR(npage)) + return PTR_ERR(npage); + + F2FS_I(inode)->i_xattr_nid = 0; + + /* need to do checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + + set_new_dnode(&dn, inode, page, npage, nid); + + if (page) + dn.inode_page_locked = 1; + truncate_node(&dn); + return 0; +} + /* * Caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). @@ -781,22 +809,16 @@ int remove_inode_page(struct inode *inode) struct page *page; nid_t ino = inode->i_ino; struct dnode_of_data dn; + int err; page = get_node_page(sbi, ino); if (IS_ERR(page)) return PTR_ERR(page); - if (F2FS_I(inode)->i_xattr_nid) { - nid_t nid = F2FS_I(inode)->i_xattr_nid; - struct page *npage = get_node_page(sbi, nid); - - if (IS_ERR(npage)) - return PTR_ERR(npage); - - F2FS_I(inode)->i_xattr_nid = 0; - set_new_dnode(&dn, inode, page, npage, nid); - dn.inode_page_locked = 1; - truncate_node(&dn); + err = truncate_xattr_node(inode, page); + if (err) { + f2fs_put_page(page, 1); + return err; } /* 0 is possible, after f2fs_new_inode() is failed */ @@ -833,29 +855,32 @@ struct page *new_node_page(struct dnode_of_data *dn, if (!page) return ERR_PTR(-ENOMEM); - get_node_info(sbi, dn->nid, &old_ni); + if (!inc_valid_node_count(sbi, dn->inode, 1)) { + err = -ENOSPC; + goto fail; + } - SetPageUptodate(page); - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); + get_node_info(sbi, dn->nid, &old_ni); /* Reinitialize old_ni with new node page */ BUG_ON(old_ni.blk_addr != NULL_ADDR); new_ni = old_ni; new_ni.ino = dn->inode->i_ino; - - if (!inc_valid_node_count(sbi, dn->inode, 1)) { - err = -ENOSPC; - goto fail; - } set_node_addr(sbi, &new_ni, NEW_ADDR); + + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); + SetPageUptodate(page); + set_page_dirty(page); + + if (ofs == XATTR_NODE_OFFSET) + F2FS_I(dn->inode)->i_xattr_nid = dn->nid; dn->node_page = page; if (ipage) update_inode(dn->inode, ipage); else sync_inode_page(dn); - set_page_dirty(page); if (ofs == 0) inc_valid_inode_count(sbi); @@ -916,7 +941,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_put_page(apage, 0); else if (err == LOCKED_PAGE) f2fs_put_page(apage, 1); - return; } struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) @@ -1167,9 +1191,9 @@ static int f2fs_write_node_page(struct page *page, /* * It is very important to gather dirty pages and write at once, so that we can * submit a big bio without interfering other data writes. - * Be default, 512 pages (2MB), a segment size, is quite reasonable. + * Be default, 512 pages (2MB) * 3 node types, is more reasonable. */ -#define COLLECT_DIRTY_NODES 512 +#define COLLECT_DIRTY_NODES 1536 static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { @@ -1187,9 +1211,10 @@ static int f2fs_write_node_pages(struct address_space *mapping, return 0; /* if mounting is failed, skip writing node pages */ - wbc->nr_to_write = max_hw_blocks(sbi); + wbc->nr_to_write = 3 * max_hw_blocks(sbi); sync_node_pages(sbi, 0, wbc); - wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); + wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - + wbc->nr_to_write); return 0; } @@ -1444,6 +1469,9 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; + if (!nid) + return; + spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); BUG_ON(!i || i->state != NID_ALLOC); @@ -1484,8 +1512,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - src = (struct f2fs_node *)page_address(page); - dst = (struct f2fs_node *)page_address(ipage); + src = F2FS_NODE(page); + dst = F2FS_NODE(ipage); memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i); dst->i.i_size = 0; @@ -1515,8 +1543,8 @@ int restore_node_summary(struct f2fs_sb_info *sbi, /* alloc temporal page for read node */ page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (IS_ERR(page)) - return PTR_ERR(page); + if (!page) + return -ENOMEM; lock_page(page); /* scan the node segment */ @@ -1535,7 +1563,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, goto out; lock_page(page); - rn = (struct f2fs_node *)page_address(page); + rn = F2FS_NODE(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index c65fb4f4230f..3496bb3e15dc 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -155,8 +155,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); if (reset) memset(rn, 0, sizeof(*rn)); rn->footer.nid = cpu_to_le32(nid); @@ -166,10 +165,8 @@ static inline void fill_node_footer(struct page *page, nid_t nid, static inline void copy_node_footer(struct page *dst, struct page *src) { - void *src_addr = page_address(src); - void *dst_addr = page_address(dst); - struct f2fs_node *src_rn = (struct f2fs_node *)src_addr; - struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr; + struct f2fs_node *src_rn = F2FS_NODE(src); + struct f2fs_node *dst_rn = F2FS_NODE(dst); memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); } @@ -177,45 +174,40 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); + rn->footer.cp_ver = ckpt->checkpoint_ver; rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } static inline nid_t ino_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.ino); } static inline nid_t nid_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.nid); } static inline unsigned int ofs_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); unsigned flag = le32_to_cpu(rn->footer.flag); return flag >> OFFSET_BIT_SHIFT; } static inline unsigned long long cpver_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le64_to_cpu(rn->footer.cp_ver); } static inline block_t next_blkaddr_of_node(struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(node_page); return le32_to_cpu(rn->footer.next_blkaddr); } @@ -237,6 +229,10 @@ static inline block_t next_blkaddr_of_node(struct page *node_page) static inline bool IS_DNODE(struct page *node_page) { unsigned int ofs = ofs_of_node(node_page); + + if (ofs == XATTR_NODE_OFFSET) + return false; + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || ofs == 5 + 2 * NIDS_PER_BLOCK) return false; @@ -250,7 +246,7 @@ static inline bool IS_DNODE(struct page *node_page) static inline void set_nid(struct page *p, int off, nid_t nid, bool i) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + struct f2fs_node *rn = F2FS_NODE(p); wait_on_page_writeback(p); @@ -263,7 +259,8 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) static inline nid_t get_nid(struct page *p, int off, bool i) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + struct f2fs_node *rn = F2FS_NODE(p); + if (i) return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); return le32_to_cpu(rn->in.nid[off]); @@ -314,8 +311,7 @@ static inline void clear_cold_data(struct page *page) static inline int is_node(struct page *page, int type) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = F2FS_NODE(page); return le32_to_cpu(rn->footer.flag) & (1 << type); } @@ -325,7 +321,7 @@ static inline int is_node(struct page *page, int type) static inline void set_cold_node(struct inode *inode, struct page *page) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (S_ISDIR(inode->i_mode)) @@ -337,7 +333,7 @@ static inline void set_cold_node(struct inode *inode, struct page *page) static inline void set_mark(struct page *page, int mark, int type) { - struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + struct f2fs_node *rn = F2FS_NODE(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) flag |= (0x1 << type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d56d951c2253..51ef5eec33d7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,8 +40,7 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - void *kaddr = page_address(ipage); - struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; + struct f2fs_node *raw_node = F2FS_NODE(ipage); struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; @@ -93,8 +92,7 @@ out: static int recover_inode(struct inode *inode, struct page *node_page) { - void *kaddr = page_address(node_page); - struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; + struct f2fs_node *raw_node = F2FS_NODE(node_page); struct f2fs_inode *raw_inode = &(raw_node->i); if (!IS_INODE(node_page)) @@ -119,7 +117,7 @@ static int recover_inode(struct inode *inode, struct page *node_page) static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { - unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page; block_t blkaddr; @@ -131,8 +129,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) /* read node page */ page = alloc_page(GFP_F2FS_ZERO); - if (IS_ERR(page)) - return PTR_ERR(page); + if (!page) + return -ENOMEM; lock_page(page); while (1) { @@ -215,6 +213,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, void *kaddr; struct inode *inode; struct page *node_page; + unsigned int offset; block_t bidx; int i; @@ -259,8 +258,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, node_page = get_node_page(sbi, nid); if (IS_ERR(node_page)) return PTR_ERR(node_page); - bidx = start_bidx_of_node(ofs_of_node(node_page)) + - le16_to_cpu(sum.ofs_in_node); + + offset = ofs_of_node(node_page); ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); @@ -269,6 +268,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, if (IS_ERR(inode)) return PTR_ERR(inode); + bidx = start_bidx_of_node(offset, F2FS_I(inode)) + + le16_to_cpu(sum.ofs_in_node); + truncate_hole(inode, bidx, bidx + 1); iput(inode); return 0; @@ -277,6 +279,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, block_t blkaddr) { + struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int start, end; struct dnode_of_data dn; struct f2fs_summary sum; @@ -284,9 +287,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, int err = 0, recovered = 0; int ilock; - start = start_bidx_of_node(ofs_of_node(page)); + start = start_bidx_of_node(ofs_of_node(page), fi); if (IS_INODE(page)) - end = start + ADDRS_PER_INODE; + end = start + ADDRS_PER_INODE(fi); else end = start + ADDRS_PER_BLOCK; @@ -357,7 +360,7 @@ err: static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head, int type) { - unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver); + unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page; int err = 0; @@ -369,7 +372,7 @@ static int recover_data(struct f2fs_sb_info *sbi, /* read node page */ page = alloc_page(GFP_NOFS | __GFP_ZERO); - if (IS_ERR(page)) + if (!page) return -ENOMEM; lock_page(page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a86d125a9885..09af9c7b0f52 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -117,7 +117,6 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } mutex_unlock(&dirty_i->seglist_lock); - return; } /* @@ -261,7 +260,6 @@ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, void *addr = curseg->sum_blk; addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); - return; } /* @@ -542,12 +540,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, { struct curseg_info *curseg = CURSEG_I(sbi, type); - if (force) { + if (force) new_curseg(sbi, type, true); - goto out; - } - - if (type == CURSEG_WARM_NODE) + else if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); @@ -555,11 +550,9 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, change_curseg(sbi, type, true); else new_curseg(sbi, type, false); -out: #ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; #endif - return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -611,18 +604,12 @@ static void f2fs_end_io_write(struct bio *bio, int err) struct bio *f2fs_bio_alloc(struct block_device *bdev, int npages) { struct bio *bio; - struct bio_private *priv; -retry: - priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); - if (!priv) { - cond_resched(); - goto retry; - } /* No failure on bio allocation */ bio = bio_alloc(GFP_NOIO, npages); bio->bi_bdev = bdev; - bio->bi_private = priv; + bio->bi_private = NULL; + return bio; } @@ -681,8 +668,17 @@ static void submit_write_page(struct f2fs_sb_info *sbi, struct page *page, do_submit_bio(sbi, type, false); alloc_new: if (sbi->bio[type] == NULL) { + struct bio_private *priv; +retry: + priv = kmalloc(sizeof(struct bio_private), GFP_NOFS); + if (!priv) { + cond_resched(); + goto retry; + } + sbi->bio[type] = f2fs_bio_alloc(bdev, max_hw_blocks(sbi)); sbi->bio[type]->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); + sbi->bio[type]->bi_private = priv; /* * The end_io will be assigned at the sumbission phase. * Until then, let bio_add_page() merge consecutive IOs as much @@ -702,6 +698,16 @@ alloc_new: trace_f2fs_submit_write_page(page, blk_addr, type); } +void f2fs_wait_on_page_writeback(struct page *page, + enum page_type type, bool sync) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + if (PageWriteback(page)) { + f2fs_submit_bio(sbi, type, sync); + wait_on_page_writeback(page); + } +} + static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -1179,7 +1185,6 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE); - return; } int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 062424a0e4c3..bdd10eab8c40 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -142,6 +142,7 @@ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ int gc_mode; /* GC_CB or GC_GREEDY */ unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int max_search; /* maximum # of segments to search */ unsigned int offset; /* last scanned bitmap offset */ unsigned int ofs_unit; /* bitmap search unit */ unsigned int min_cost; /* minimum cost */ @@ -453,7 +454,8 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) static inline bool need_SSR(struct f2fs_sb_info *sbi) { - return (free_sections(sbi) < overprovision_sections(sbi)); + return ((prefree_segments(sbi) / sbi->segs_per_sec) + + free_sections(sbi) < overprovision_sections(sbi)); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) @@ -470,7 +472,7 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) static inline int utilization(struct f2fs_sb_info *sbi) { - return div_u64(valid_user_blocks(sbi) * 100, sbi->user_block_count); + return div_u64((u64)valid_user_blocks(sbi) * 100, sbi->user_block_count); } /* diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 75c7dc363e92..13d0a0fe49dd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -18,20 +18,25 @@ #include #include #include +#include #include #include #include #include +#include #include "f2fs.h" #include "node.h" #include "segment.h" #include "xattr.h" +#include "gc.h" #define CREATE_TRACE_POINTS #include +static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; enum { Opt_gc_background, @@ -42,6 +47,7 @@ enum { Opt_noacl, Opt_active_logs, Opt_disable_ext_identify, + Opt_inline_xattr, Opt_err, }; @@ -54,9 +60,117 @@ static match_table_t f2fs_tokens = { {Opt_noacl, "noacl"}, {Opt_active_logs, "active_logs=%u"}, {Opt_disable_ext_identify, "disable_ext_identify"}, + {Opt_inline_xattr, "inline_xattr"}, {Opt_err, NULL}, }; +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -126,11 +240,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nouser_xattr: clear_opt(sbi, XATTR_USER); break; + case Opt_inline_xattr: + set_opt(sbi, INLINE_XATTR); + break; #else case Opt_nouser_xattr: f2fs_msg(sb, KERN_INFO, "nouser_xattr options not supported"); break; + case Opt_inline_xattr: + f2fs_msg(sb, KERN_INFO, + "inline_xattr options not supported"); + break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL case Opt_noacl: @@ -180,6 +301,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) set_inode_flag(fi, FI_NEW_INODE); + if (test_opt(F2FS_SB(sb), INLINE_XATTR)) + set_inode_flag(fi, FI_INLINE_XATTR); + return &fi->vfs_inode; } @@ -205,7 +329,6 @@ static int f2fs_drop_inode(struct inode *inode) static void f2fs_dirty_inode(struct inode *inode, int flags) { set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); - return; } static void f2fs_i_callback(struct rcu_head *head) @@ -223,6 +346,12 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); + if (sbi->s_proc) { + remove_proc_entry("segment_info", sbi->s_proc); + remove_proc_entry(sb->s_id, f2fs_proc_root); + } + kobject_del(&sbi->s_kobj); + f2fs_destroy_stats(sbi); stop_gc_thread(sbi); @@ -236,6 +365,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); @@ -325,6 +456,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",user_xattr"); else seq_puts(seq, ",nouser_xattr"); + if (test_opt(sbi, INLINE_XATTR)) + seq_puts(seq, ",inline_xattr"); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -340,6 +473,36 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static int segment_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + int i; + + for (i = 0; i < total_segs; i++) { + seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); + if (i != 0 && (i % 10) == 0) + seq_puts(seq, "\n"); + else + seq_puts(seq, " "); + } + return 0; +} + +static int segment_info_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, segment_info_seq_show, PDE_DATA(inode)); +} + +static const struct file_operations f2fs_seq_segment_info_fops = { + .owner = THIS_MODULE, + .open = segment_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -455,7 +618,7 @@ static const struct export_operations f2fs_export_ops = { static loff_t max_file_size(unsigned bits) { - loff_t result = ADDRS_PER_INODE; + loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS); loff_t leaf_count = ADDRS_PER_BLOCK; /* two direct node blocks */ @@ -766,6 +929,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto fail; + if (f2fs_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); + + if (sbi->s_proc) + proc_create_data("segment_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_segment_info_fops, sb); + if (test_opt(sbi, DISCARD)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); if (!blk_queue_discard(q)) @@ -774,6 +944,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "the device does not support discard"); } + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + return 0; fail: stop_gc_thread(sbi); @@ -841,29 +1018,49 @@ static int __init init_f2fs_fs(void) goto fail; err = create_node_manager_caches(); if (err) - goto fail; + goto free_inodecache; err = create_gc_caches(); if (err) - goto fail; + goto free_node_manager_caches; err = create_checkpoint_caches(); if (err) - goto fail; + goto free_gc_caches; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) { + err = -ENOMEM; + goto free_checkpoint_caches; + } err = register_filesystem(&f2fs_fs_type); if (err) - goto fail; + goto free_kset; f2fs_create_root_stats(); + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + return 0; + +free_kset: + kset_unregister(f2fs_kset); +free_checkpoint_caches: + destroy_checkpoint_caches(); +free_gc_caches: + destroy_gc_caches(); +free_node_manager_caches: + destroy_node_manager_caches(); +free_inodecache: + destroy_inodecache(); fail: return err; } static void __exit exit_f2fs_fs(void) { + remove_proc_entry("fs/f2fs", NULL); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); destroy_checkpoint_caches(); destroy_gc_caches(); destroy_node_manager_caches(); destroy_inodecache(); + kset_unregister(f2fs_kset); } module_init(init_f2fs_fs) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3ab07ecd86ca..1ac8a5f6e380 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -246,40 +246,170 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) return handler; } -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, + size_t name_len, const char *name) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *entry; - struct page *page; - void *base_addr; - int error = 0, found = 0; - size_t value_len, name_len; - - if (name == NULL) - return -EINVAL; - name_len = strlen(name); - - if (!fi->i_xattr_nid) - return -ENODATA; - - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) - return PTR_ERR(page); - base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { if (entry->e_name_index != name_index) continue; if (entry->e_name_len != name_len) continue; - if (!memcmp(entry->e_name, name, name_len)) { - found = 1; + if (!memcmp(entry->e_name, name, name_len)) break; + } + return entry; +} + +static void *read_all_xattrs(struct inode *inode, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct f2fs_xattr_header *header; + size_t size = PAGE_SIZE, inline_size = 0; + void *txattr_addr; + + inline_size = inline_xattr_size(inode); + + txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); + if (!txattr_addr) + return NULL; + + /* read from inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + goto fail; + inline_addr = inline_xattr_addr(page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + } + + /* read from xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) + goto fail; + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + f2fs_put_page(xpage, 1); + } + + header = XATTR_HDR(txattr_addr); + + /* never been allocated xattrs */ + if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { + header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); + header->h_refcount = cpu_to_le32(1); + } + return txattr_addr; +fail: + kzfree(txattr_addr); + return NULL; +} + +static inline int write_all_xattrs(struct inode *inode, __u32 hsize, + void *txattr_addr, struct page *ipage) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + size_t inline_size = 0; + void *xattr_addr; + struct page *xpage; + nid_t new_nid = 0; + int err; + + inline_size = inline_xattr_size(inode); + + if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) + if (!alloc_nid(sbi, &new_nid)) + return -ENOSPC; + + /* write to inline xattr */ + if (inline_size) { + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(page); + } + inline_addr = inline_xattr_addr(page); + } + memcpy(inline_addr, txattr_addr, inline_size); + f2fs_put_page(page, 1); + + /* no need to use xattr node block */ + if (hsize <= inline_size) { + err = truncate_xattr_node(inode, ipage); + alloc_nid_failed(sbi, new_nid); + return err; } } - if (!found) { + + /* write to xattr node block */ + if (F2FS_I(inode)->i_xattr_nid) { + xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + BUG_ON(new_nid); + } else { + struct dnode_of_data dn; + set_new_dnode(&dn, inode, NULL, NULL, new_nid); + xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); + if (IS_ERR(xpage)) { + alloc_nid_failed(sbi, new_nid); + return PTR_ERR(xpage); + } + alloc_nid_done(sbi, new_nid); + } + + xattr_addr = page_address(xpage); + memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE - + sizeof(struct node_footer)); + set_page_dirty(xpage); + f2fs_put_page(xpage, 1); + + /* need to checkpoint during fsync */ + F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); + return 0; +} + +int f2fs_getxattr(struct inode *inode, int name_index, const char *name, + void *buffer, size_t buffer_size) +{ + struct f2fs_xattr_entry *entry; + void *base_addr; + int error = 0; + size_t value_len, name_len; + + if (name == NULL) + return -EINVAL; + name_len = strlen(name); + + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; + + entry = __find_xattr(base_addr, name_index, name_len, name); + if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; } @@ -298,28 +428,21 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, error = value_len; cleanup: - f2fs_put_page(page, 1); + kzfree(base_addr); return error; } ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = dentry->d_inode; - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *entry; - struct page *page; void *base_addr; int error = 0; size_t rest = buffer_size; - if (!fi->i_xattr_nid) - return 0; - - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) - return PTR_ERR(page); - base_addr = page_address(page); + base_addr = read_all_xattrs(inode, NULL); + if (!base_addr) + return -ENOMEM; list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -342,7 +465,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } error = buffer_size - rest; cleanup: - f2fs_put_page(page, 1); + kzfree(base_addr); return error; } @@ -351,14 +474,13 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_xattr_header *header = NULL; struct f2fs_xattr_entry *here, *last; - struct page *page; void *base_addr; - int error, found, free, newsize; + int found, newsize; size_t name_len; - char *pval; int ilock; + __u32 new_hsize; + int error = -ENOMEM; if (name == NULL) return -EINVAL; @@ -368,67 +490,21 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, name_len = strlen(name); - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN) + if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) return -ERANGE; f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - if (!fi->i_xattr_nid) { - /* Allocate new attribute block */ - struct dnode_of_data dn; - - if (!alloc_nid(sbi, &fi->i_xattr_nid)) { - error = -ENOSPC; - goto exit; - } - set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); - mark_inode_dirty(inode); - - page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); - if (IS_ERR(page)) { - alloc_nid_failed(sbi, fi->i_xattr_nid); - fi->i_xattr_nid = 0; - error = PTR_ERR(page); - goto exit; - } - - alloc_nid_done(sbi, fi->i_xattr_nid); - base_addr = page_address(page); - header = XATTR_HDR(base_addr); - header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); - header->h_refcount = cpu_to_le32(1); - } else { - /* The inode already has an extended attribute block. */ - page = get_node_page(sbi, fi->i_xattr_nid); - if (IS_ERR(page)) { - error = PTR_ERR(page); - goto exit; - } - - base_addr = page_address(page); - header = XATTR_HDR(base_addr); - } - - if (le32_to_cpu(header->h_magic) != F2FS_XATTR_MAGIC) { - error = -EIO; - goto cleanup; - } + base_addr = read_all_xattrs(inode, ipage); + if (!base_addr) + goto exit; /* find entry with wanted name. */ - found = 0; - list_for_each_xattr(here, base_addr) { - if (here->e_name_index != name_index) - continue; - if (here->e_name_len != name_len) - continue; - if (!memcmp(here->e_name, name, name_len)) { - found = 1; - break; - } - } + here = __find_xattr(base_addr, name_index, name_len, name); + found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; last = here; while (!IS_XATTR_LAST_ENTRY(last)) @@ -439,22 +515,25 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, /* 1. Check space */ if (value) { - /* If value is NULL, it is remove operation. + int free; + /* + * If value is NULL, it is remove operation. * In case of update operation, we caculate free. */ - free = MIN_OFFSET - ((char *)last - (char *)header); + free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) free = free - ENTRY_SIZE(here); if (free < newsize) { error = -ENOSPC; - goto cleanup; + goto exit; } } /* 2. Remove old entry */ if (found) { - /* If entry is found, remove old entry. + /* + * If entry is found, remove old entry. * If not found, remove operation is not needed. */ struct f2fs_xattr_entry *next = XATTR_NEXT_ENTRY(here); @@ -465,10 +544,15 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, memset(last, 0, oldsize); } + new_hsize = (char *)last - (char *)base_addr; + /* 3. Write new entry */ if (value) { - /* Before we come here, old entry is removed. - * We just write new entry. */ + char *pval; + /* + * Before we come here, old entry is removed. + * We just write new entry. + */ memset(last, 0, newsize); last->e_name_index = name_index; last->e_name_len = name_len; @@ -476,26 +560,25 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, pval = last->e_name + name_len; memcpy(pval, value, value_len); last->e_value_size = cpu_to_le16(value_len); + new_hsize += newsize; } - set_page_dirty(page); - f2fs_put_page(page, 1); + error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + if (error) + goto exit; if (is_inode_flag_set(fi, FI_ACL_MODE)) { inode->i_mode = fi->i_acl_mode; inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } + if (ipage) update_inode(inode, ipage); else update_inode_page(inode); - mutex_unlock_op(sbi, ilock); - - return 0; -cleanup: - f2fs_put_page(page, 1); exit: mutex_unlock_op(sbi, ilock); + kzfree(base_addr); return error; } diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 3c0817bef25d..02a08fb88a15 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -51,7 +51,7 @@ struct f2fs_xattr_entry { #define XATTR_HDR(ptr) ((struct f2fs_xattr_header *)(ptr)) #define XATTR_ENTRY(ptr) ((struct f2fs_xattr_entry *)(ptr)) -#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr)+1)) +#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) #define XATTR_ROUND (3) #define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) @@ -69,17 +69,16 @@ struct f2fs_xattr_entry { !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) +#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \ + sizeof(struct node_footer) - sizeof(__u32)) -#define MIN_OFFSET XATTR_ALIGN(PAGE_SIZE - \ - sizeof(struct node_footer) - \ - sizeof(__u32)) - -#define MAX_VALUE_LEN (MIN_OFFSET - sizeof(struct f2fs_xattr_header) - \ - sizeof(struct f2fs_xattr_entry)) +#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \ + sizeof(struct f2fs_xattr_header) - \ + sizeof(struct f2fs_xattr_entry)) /* * On-disk structure of f2fs_xattr - * We use only 1 block for xattr. + * We use inline xattrs space + 1 block for xattr. * * +--------------------+ * | f2fs_xattr_header | diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 383d5e39b280..bb942f6d5702 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -140,14 +140,24 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ -#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ -#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ +#define ADDRS_PER_INODE(fi) addrs_per_inode(fi) +#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ +#define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ + +#define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (DEF_ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (DEF_ADDRS_PER_INODE + 5) + +#define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ - __u8 i_reserved; /* reserved */ + __u8 i_inline; /* file inline flags */ __le32 i_uid; /* user ID */ __le32 i_gid; /* group ID */ __le32 i_links; /* links count */ @@ -170,7 +180,7 @@ struct f2fs_inode { struct f2fs_extent i_ext; /* caching a largest extent */ - __le32 i_addr[ADDRS_PER_INODE]; /* Pointers to data blocks */ + __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ __le32 i_nid[5]; /* direct(2), indirect(2), double_indirect(1) node id */