From 3974320ca6aa68d479051f208d5c95afd1e47a4c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 16 Feb 2017 10:27:16 -0500 Subject: [PATCH] GFS2: Implement iomap for block_map This patch implements iomap for block mapping, and switches the block_map function to use it under the covers. The additional IOMAP_F_BOUNDARY iomap flag indicates when iomap has reached a "metadata boundary" and fetching the next mapping is likely to incur an additional I/O. This flag is used for setting the bh buffer boundary flag. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 335 +++++++++++++++++++++++++++++------------- fs/gfs2/bmap.h | 4 + fs/gfs2/trace_gfs2.h | 65 ++++++++ include/linux/iomap.h | 3 +- 4 files changed, 307 insertions(+), 100 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 03badc8417d7..d5f0d96169c5 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -505,10 +506,8 @@ static inline unsigned int hptrs(struct gfs2_sbd *sdp, const unsigned int hgt) * Returns: errno on error */ -static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, - bool zero_new, struct metapath *mp, - const size_t maxlen, sector_t *dblock, - unsigned *dblks) +static int gfs2_iomap_alloc(struct inode *inode, struct iomap *iomap, + unsigned flags, struct metapath *mp) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -516,36 +515,37 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct buffer_head *dibh = mp->mp_bh[0]; u64 bn; unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; + unsigned dblks = 0; unsigned ptrs_per_blk; const unsigned end_of_metadata = mp->mp_fheight - 1; int ret; - int eob = 0; enum alloc_state state; __be64 *ptr; __be64 zero_bn = 0; + size_t maxlen = iomap->length >> inode->i_blkbits; BUG_ON(mp->mp_aheight < 1); BUG_ON(dibh == NULL); - *dblock = 0; - *dblks = 0; gfs2_trans_add_meta(ip->i_gl, dibh); if (mp->mp_fheight == mp->mp_aheight) { struct buffer_head *bh; + int eob; + /* Bottom indirect block exists, find unalloced extent size */ ptr = metapointer(end_of_metadata, mp); bh = mp->mp_bh[end_of_metadata]; - *dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, - maxlen, &eob); - BUG_ON(*dblks < 1); + dblks = gfs2_extent_length(bh->b_data, bh->b_size, ptr, + maxlen, &eob); + BUG_ON(dblks < 1); state = ALLOC_DATA; } else { /* Need to allocate indirect blocks */ ptrs_per_blk = mp->mp_fheight > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; - *dblks = min(maxlen, (size_t)(ptrs_per_blk - - mp->mp_list[end_of_metadata])); + dblks = min(maxlen, (size_t)(ptrs_per_blk - + mp->mp_list[end_of_metadata])); if (mp->mp_fheight == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = mp->mp_fheight - mp->mp_aheight; @@ -561,7 +561,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* start of the second part of the function (state machine) */ - blks = *dblks + iblks; + blks = dblks + iblks; i = mp->mp_aheight; do { int error; @@ -618,32 +618,219 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, break; /* Tree complete, adding data blocks */ case ALLOC_DATA: - BUG_ON(n > *dblks); + BUG_ON(n > dblks); BUG_ON(mp->mp_bh[end_of_metadata] == NULL); gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); - *dblks = n; + dblks = n; ptr = metapointer(end_of_metadata, mp); - *dblock = bn; + iomap->addr = bn << inode->i_blkbits; + iomap->flags |= IOMAP_F_NEW; while (n-- > 0) *ptr++ = cpu_to_be64(bn++); - if (zero_new) { - ret = sb_issue_zeroout(sb, *dblock, *dblks, - GFP_NOFS); + if (flags & IOMAP_ZERO) { + ret = sb_issue_zeroout(sb, iomap->addr >> inode->i_blkbits, + dblks, GFP_NOFS); if (ret) { fs_err(sdp, "Failed to zero data buffers\n"); + flags &= ~IOMAP_ZERO; } } break; } - } while ((state != ALLOC_DATA) || !(*dblock)); + } while (iomap->addr == IOMAP_NULL_ADDR); + iomap->length = (u64)dblks << inode->i_blkbits; ip->i_height = mp->mp_fheight; gfs2_add_inode_blocks(&ip->i_inode, alloced); gfs2_dinode_out(ip, mp->mp_bh[0]->b_data); return 0; } +/** + * hole_size - figure out the size of a hole + * @inode: The inode + * @lblock: The logical starting block number + * @mp: The metapath + * + * Returns: The hole size in bytes + * + */ +static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct metapath mp_eof; + u64 factor = 1; + int hgt; + u64 holesz = 0; + const __be64 *first, *end, *ptr; + const struct buffer_head *bh; + u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits; + int zeroptrs; + bool done = false; + + /* Get another metapath, to the very last byte */ + find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height); + for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) { + bh = mp->mp_bh[hgt]; + if (bh) { + zeroptrs = 0; + first = metapointer(hgt, mp); + end = (const __be64 *)(bh->b_data + bh->b_size); + + for (ptr = first; ptr < end; ptr++) { + if (*ptr) { + done = true; + break; + } else { + zeroptrs++; + } + } + } else { + zeroptrs = sdp->sd_inptrs; + } + if (factor * zeroptrs >= lblock_stop - lblock + 1) { + holesz = lblock_stop - lblock + 1; + break; + } + holesz += factor * zeroptrs; + + factor *= sdp->sd_inptrs; + if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1])) + (mp->mp_list[hgt - 1])++; + } + return holesz << inode->i_blkbits; +} + +static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + + iomap->addr = (ip->i_no_addr << inode->i_blkbits) + + sizeof(struct gfs2_dinode); + iomap->offset = 0; + iomap->length = i_size_read(inode); + iomap->type = IOMAP_MAPPED; + iomap->flags = IOMAP_F_DATA_INLINE; +} + +/** + * gfs2_iomap_begin - Map blocks from an inode to disk blocks + * @inode: The inode + * @pos: Starting position in bytes + * @length: Length to map, in bytes + * @flags: iomap flags + * @iomap: The iomap structure + * + * Returns: errno + */ +int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, + unsigned flags, struct iomap *iomap) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct metapath mp = { .mp_aheight = 1, }; + unsigned int factor = sdp->sd_sb.sb_bsize; + const u64 *arr = sdp->sd_heightsize; + __be64 *ptr; + sector_t lblock; + sector_t lend; + int ret; + int eob; + unsigned int len; + struct buffer_head *bh; + u8 height; + + trace_gfs2_iomap_start(ip, pos, length, flags); + if (!length) { + ret = -EINVAL; + goto out; + } + + if ((flags & IOMAP_REPORT) && gfs2_is_stuffed(ip)) { + gfs2_stuffed_iomap(inode, iomap); + if (pos >= iomap->length) + return -ENOENT; + ret = 0; + goto out; + } + + lblock = pos >> inode->i_blkbits; + lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits; + + iomap->offset = lblock << inode->i_blkbits; + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; + iomap->length = (u64)(lend - lblock) << inode->i_blkbits; + iomap->flags = IOMAP_F_MERGED; + bmap_lock(ip, 0); + + /* + * Directory data blocks have a struct gfs2_meta_header header, so the + * remaining size is smaller than the filesystem block size. Logical + * block numbers for directories are in units of this remaining size! + */ + if (gfs2_is_dir(ip)) { + factor = sdp->sd_jbsize; + arr = sdp->sd_jheightsize; + } + + ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); + if (ret) + goto out_release; + + height = ip->i_height; + while ((lblock + 1) * factor > arr[height]) + height++; + find_metapath(sdp, lblock, &mp, height); + if (height > ip->i_height || gfs2_is_stuffed(ip)) + goto do_alloc; + + ret = lookup_metapath(ip, &mp); + if (ret < 0) + goto out_release; + + if (mp.mp_aheight != ip->i_height) + goto do_alloc; + + ptr = metapointer(ip->i_height - 1, &mp); + if (*ptr == 0) + goto do_alloc; + + iomap->type = IOMAP_MAPPED; + iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits; + + bh = mp.mp_bh[ip->i_height - 1]; + len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob); + if (eob) + iomap->flags |= IOMAP_F_BOUNDARY; + iomap->length = (u64)len << inode->i_blkbits; + + ret = 0; + +out_release: + release_metapath(&mp); + bmap_unlock(ip, 0); +out: + trace_gfs2_iomap_end(ip, iomap, ret); + return ret; + +do_alloc: + if (!(flags & IOMAP_WRITE)) { + if (pos >= i_size_read(inode)) { + ret = -ENOENT; + goto out_release; + } + ret = 0; + iomap->length = hole_size(inode, lblock, &mp); + goto out_release; + } + + ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + goto out_release; +} + /** * gfs2_block_map - Map a block from an inode to a disk block * @inode: The inode @@ -662,93 +849,43 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh_map, int create) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - unsigned int factor = sdp->sd_sb.sb_bsize; - const size_t maxlen = bh_map->b_size >> inode->i_blkbits; - const u64 *arr = sdp->sd_heightsize; - __be64 *ptr; - u64 size; - struct metapath mp; - int ret; - int eob; - unsigned int len; - struct buffer_head *bh; - u8 height; - bool zero_new = false; - sector_t dblock = 0; - unsigned dblks; + struct iomap iomap; + int ret, flags = 0; - BUG_ON(maxlen == 0); - - memset(&mp, 0, sizeof(mp)); - bmap_lock(ip, create); clear_buffer_mapped(bh_map); clear_buffer_new(bh_map); clear_buffer_boundary(bh_map); trace_gfs2_bmap(ip, bh_map, lblock, create, 1); - /* - * Directory data blocks have a struct gfs2_meta_header header, so the - * remaining size is smaller than the filesystem block size. Logical - * block numbers for directories are in units of this remaining size! - */ - if (gfs2_is_dir(ip)) { - factor = sdp->sd_jbsize; - arr = sdp->sd_jheightsize; - } - - ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]); - if (ret) - goto out; - - height = ip->i_height; - size = (lblock + 1) * factor; - while (size > arr[height]) - height++; - find_metapath(sdp, lblock, &mp, height); - mp.mp_aheight = 1; - if (height > ip->i_height || gfs2_is_stuffed(ip)) - goto do_alloc; - ret = lookup_metapath(ip, &mp); - if (ret < 0) - goto out; - if (mp.mp_aheight != ip->i_height) - goto do_alloc; - ptr = metapointer(ip->i_height - 1, &mp); - if (*ptr == 0) - goto do_alloc; - map_bh(bh_map, inode->i_sb, be64_to_cpu(*ptr)); - bh = mp.mp_bh[ip->i_height - 1]; - len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, maxlen, &eob); - bh_map->b_size = (len << inode->i_blkbits); - if (eob) - set_buffer_boundary(bh_map); - ret = 0; -out: - release_metapath(&mp); - trace_gfs2_bmap(ip, bh_map, lblock, create, ret); - bmap_unlock(ip, create); - return ret; - -do_alloc: - /* All allocations are done here, firstly check create flag */ - if (!create) { - BUG_ON(gfs2_is_stuffed(ip)); - ret = 0; - goto out; - } - - /* At this point ret is the tree depth of already allocated blocks */ + if (create) + flags |= IOMAP_WRITE; if (buffer_zeronew(bh_map)) - zero_new = true; - ret = gfs2_bmap_alloc(inode, lblock, zero_new, &mp, maxlen, &dblock, - &dblks); - if (ret == 0) { - map_bh(bh_map, inode->i_sb, dblock); - bh_map->b_size = dblks << inode->i_blkbits; - set_buffer_new(bh_map); + flags |= IOMAP_ZERO; + ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits, + bh_map->b_size, flags, &iomap); + if (ret) { + if (!create && ret == -ENOENT) { + /* Return unmapped buffer beyond the end of file. */ + ret = 0; + } + goto out; } - goto out; + + if (iomap.length > bh_map->b_size) { + iomap.length = bh_map->b_size; + iomap.flags &= ~IOMAP_F_BOUNDARY; + } + if (iomap.addr != IOMAP_NULL_ADDR) + map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits); + bh_map->b_size = iomap.length; + if (iomap.flags & IOMAP_F_BOUNDARY) + set_buffer_boundary(bh_map); + if (iomap.flags & IOMAP_F_NEW) + set_buffer_new(bh_map); + +out: + trace_gfs2_bmap(ip, bh_map, lblock, create, ret); + return ret; } /* diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 81ded5e2aaa2..443cc182cf18 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -10,6 +10,8 @@ #ifndef __BMAP_DOT_H__ #define __BMAP_DOT_H__ +#include + #include "inode.h" struct inode; @@ -47,6 +49,8 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); extern int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); +extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, + unsigned flags, struct iomap *iomap); extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); extern int gfs2_setattr_size(struct inode *inode, u64 size); diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 49ac55da4e33..3c91ae3cf0b2 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "incore.h" #include "glock.h" #include "rgrp.h" @@ -469,6 +470,70 @@ TRACE_EVENT(gfs2_bmap, __entry->errno) ); +TRACE_EVENT(gfs2_iomap_start, + + TP_PROTO(const struct gfs2_inode *ip, loff_t pos, ssize_t length, + u16 flags), + + TP_ARGS(ip, pos, length, flags), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, inum ) + __field( loff_t, pos ) + __field( ssize_t, length ) + __field( u16, flags ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev; + __entry->inum = ip->i_no_addr; + __entry->pos = pos; + __entry->length = length; + __entry->flags = flags; + ), + + TP_printk("%u,%u bmap %llu iomap start %llu/%lu flags:%08x", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->pos, + (unsigned long)__entry->length, (u16)__entry->flags) +); + +TRACE_EVENT(gfs2_iomap_end, + + TP_PROTO(const struct gfs2_inode *ip, struct iomap *iomap, int ret), + + TP_ARGS(ip, iomap, ret), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( u64, inum ) + __field( loff_t, offset ) + __field( ssize_t, length ) + __field( u16, flags ) + __field( u16, type ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->dev = ip->i_gl->gl_name.ln_sbd->sd_vfs->s_dev; + __entry->inum = ip->i_no_addr; + __entry->offset = iomap->offset; + __entry->length = iomap->length; + __entry->flags = iomap->flags; + __entry->type = iomap->type; + __entry->ret = ret; + ), + + TP_printk("%u,%u bmap %llu iomap end %llu/%lu ty:%d flags:%08x rc:%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->inum, + (unsigned long long)__entry->offset, + (unsigned long)__entry->length, (u16)__entry->type, + (u16)__entry->flags, __entry->ret) +); + /* Keep track of blocks as they are allocated/freed */ TRACE_EVENT(gfs2_block_alloc, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 2b0790dbd6ea..a61be86710b5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -21,7 +21,8 @@ struct vm_fault; /* * Flags for all iomap mappings: */ -#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ +#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ +#define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */ /* * Flags that only need to be reported for IOMAP_REPORT requests: