From 3fefdeee92686995ff03e847cbd7bf5ebcd85ff8 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 13 Nov 2013 14:53:45 -0600 Subject: [PATCH 01/51] xfs: simplify xfs_setsize_buftarg callchain; remove unused arg The "verbose" argument to xfs_setsize_buftarg_flags() has been unused since: ffe37436 xfs: stop using the page cache to back the buffer cache Remove it, and fold the function into xfs_setsize_buftarg() now that there's no need for different types of callers. Fix inconsistent comment spacing while we're at it. Signed-off-by: Eric Sandeen Reviewed-by: Brian Foster Signed-off-by: Ben Myers --- fs/xfs/xfs_buf.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index c7f0b77dcb00..ce01c1a17cc1 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1602,12 +1602,11 @@ xfs_free_buftarg( kmem_free(btp); } -STATIC int -xfs_setsize_buftarg_flags( +int +xfs_setsize_buftarg( xfs_buftarg_t *btp, unsigned int blocksize, - unsigned int sectorsize, - int verbose) + unsigned int sectorsize) { btp->bt_bsize = blocksize; btp->bt_sshift = ffs(sectorsize) - 1; @@ -1628,26 +1627,17 @@ xfs_setsize_buftarg_flags( } /* - * When allocating the initial buffer target we have not yet - * read in the superblock, so don't know what sized sectors - * are being used at this early stage. Play safe. + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used at this early stage. Play safe. */ STATIC int xfs_setsize_buftarg_early( xfs_buftarg_t *btp, struct block_device *bdev) { - return xfs_setsize_buftarg_flags(btp, - PAGE_SIZE, bdev_logical_block_size(bdev), 0); -} - -int -xfs_setsize_buftarg( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize) -{ - return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); + return xfs_setsize_buftarg(btp, PAGE_SIZE, + bdev_logical_block_size(bdev)); } xfs_buftarg_t * From f23007784570278ca5963c35d5b3847d710ed695 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Nov 2013 09:38:48 -0800 Subject: [PATCH 02/51] xfs: remove unused FI_ flags Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Ben Myers --- fs/xfs/xfs_vnode.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index 3e8e797c6d11..e8a77383c0d5 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h @@ -34,15 +34,6 @@ struct attrlist_cursor_kern; { IO_ISDIRECT, "DIRECT" }, \ { IO_INVIS, "INVIS"} -/* - * Flush/Invalidate options for vop_toss/flush/flushinval_pages. - */ -#define FI_NONE 0 /* none */ -#define FI_REMAPF 1 /* Do a remapf prior to the operation */ -#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. - Prevent VM access to the pages until - the operation completes. */ - /* * Some useful predicates. */ From 071c529eb672648ee8ca3f90944bcbcc730b4c06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 31 Oct 2013 21:00:10 +0300 Subject: [PATCH 03/51] xfs: underflow bug in xfs_attrlist_by_handle() If we allocate less than sizeof(struct attrlist) then we end up corrupting memory or doing a ZERO_PTR_SIZE dereference. This can only be triggered with CAP_SYS_ADMIN. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ioctl.c | 3 ++- fs/xfs/xfs_ioctl32.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4d613401a5e0..33ad9a77791f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -442,7 +442,8 @@ xfs_attrlist_by_handle( return -XFS_ERROR(EPERM); if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index e8fb1231db81..a7992f8de9d3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -356,7 +356,8 @@ xfs_compat_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* From 10f73d27c8e977fb6fbd6058517069be830c6c9a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Nov 2013 03:45:36 -0800 Subject: [PATCH 04/51] xfs: fix the comment explaining xfs_trans_dqlockedjoin Signed-off-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Ben Myers --- fs/xfs/xfs_trans_dquot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd2a10e15d3a..41172861e857 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -295,8 +295,8 @@ xfs_trans_mod_dquot( /* * Given an array of dqtrx structures, lock all the dquots associated and join * them to the transaction, provided they have been modified. We know that the - * highest number of dquots of one type - usr, grp OR prj - involved in a - * transaction is 2 so we don't need to make this very generic. + * highest number of dquots of one type - usr, grp and prj - involved in a + * transaction is 3 so we don't need to make this very generic. */ STATIC void xfs_trans_dqlockedjoin( From f9fd0135610084abef6867d984e9951c3099950d Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 20 Nov 2013 16:08:53 +0800 Subject: [PATCH 05/51] xfs: don't perform discard if the given range length is less than block size For discard operation, we should return EINVAL if the given range length is less than a block size, otherwise it will go through the file system to discard data blocks as the end range might be evaluated to -1, e.g, # fstrim -v -o 0 -l 100 /xfs7 /xfs7: 9811378176 bytes were trimmed This issue can be triggered via xfstests/generic/288. Also, it seems to get the request queue pointer via bdev_get_queue() instead of the hard code pointer dereference is not a bad thing. Signed-off-by: Jie Liu Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_discard.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8367d6dc18c9..4f11ef011139 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -157,7 +157,7 @@ xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { - struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; + struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev); unsigned int granularity = q->limits.discard_granularity; struct fstrim_range range; xfs_daddr_t start, end, minlen; @@ -180,7 +180,8 @@ xfs_ioc_trim( * matter as trimming blocks is an advisory interface. */ if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || - range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || + range.len < mp->m_sb.sb_blocksize) return -XFS_ERROR(EINVAL); start = BTOBB(range.start); From b7d961b35b3ab69609aeea93f870269cb6e7ba4d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Nov 2013 15:41:06 +1100 Subject: [PATCH 06/51] xfs: growfs overruns AGFL buffer on V4 filesystems This loop in xfs_growfs_data_private() is incorrect for V4 superblocks filesystems: for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); For V4 filesystems, we don't have a agfl header structure, and so XFS_AGFL_SIZE() returns an entire sector's worth of entries, which we then index from an offset into the sector. Hence: buffer overrun. This problem was introduced in 3.10 by commit 77c95bba ("xfs: add CRC checks to the AGFL") which changed the AGFL structure but failed to update the growfs code to handle the different structures. Fix it by using the correct offset into the buffer for both V4 and V5 filesystems. Cc: Signed-off-by: Dave Chinner Reviewed-by: Jie Liu Signed-off-by: Ben Myers --- fs/xfs/xfs_fsops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a6e54b3319bd..02fb943cbf22 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -220,6 +220,8 @@ xfs_growfs_data_private( */ nfree = 0; for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { + __be32 *agfl_bno; + /* * AG freespace header block */ @@ -279,8 +281,10 @@ xfs_growfs_data_private( agfl->agfl_seqno = cpu_to_be32(agno); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); } + + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) - agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); + agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(bp); xfs_buf_relse(bp); From dff6efc326a4d5f305797d4a6bba14f374fdd633 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Nov 2013 07:17:07 -0800 Subject: [PATCH 07/51] fs: fix iversion handling Currently notify_change directly updates i_version for size updates, which not only is counter to how all other fields are updated through struct iattr, but also breaks XFS, which need inode updates to happen under its own lock, and synchronized to the structure that gets written to the log. Remove the update in the common code, and it to btrfs and ext4, XFS already does a proper updaste internally and currently gets a double update with the existing code. IMHO this is 3.13 and -stable material and should go in through the XFS tree. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Dilger Acked-by: Jan Kara Reviewed-by: Dave Chinner Signed-off-by: Chris Mason Signed-off-by: Ben Myers --- fs/attr.c | 5 ----- fs/btrfs/inode.c | 8 ++++++-- fs/ext4/inode.c | 4 ++++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/attr.c b/fs/attr.c index 267968d94673..5d4e59d56e85 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -202,11 +202,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return -EPERM; } - if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { - if (attr->ia_size != inode->i_size) - inode_inc_iversion(inode); - } - if ((ia_valid & ATTR_MODE)) { umode_t amode = attr->ia_mode; /* Flag setting protected by i_mutex */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f1a77449d032..471a4f7f4044 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4354,8 +4354,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ - if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) - inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); + if (newsize != oldsize) { + inode_inc_iversion(inode); + if (!(mask & (ATTR_CTIME | ATTR_MTIME))) + inode->i_ctime = inode->i_mtime = + current_fs_time(inode->i_sb); + } if (newsize > oldsize) { truncate_pagecache(inode, newsize); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 075763474118..7f0e15ebacd9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4598,6 +4598,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size > sbi->s_bitmap_maxbytes) return -EFBIG; } + + if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + if (S_ISREG(inode->i_mode) && (attr->ia_size < inode->i_size)) { if (ext4_should_order_data(inode)) { From 2a84108fe275f95fbe838b1c92b7c45258dcae5c Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Wed, 2 Oct 2013 07:51:12 -0500 Subject: [PATCH 08/51] xfs: free the list of recovery items on error Recovery builds a list of items on the transaction's r_itemq head. Normally these items are committed and freed. But in the event of a recovery error, these allocations are leaked. If the error occurs during item reordering, then reconstruct the r_itemq list before deleting the list to avoid leaking the entries that were on one of the temporary lists. Signed-off-by: Mark Tinguely Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_log_recover.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b6b669df40f3..07ab52ca8aba 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1651,6 +1651,7 @@ xlog_recover_reorder_trans( int pass) { xlog_recover_item_t *item, *n; + int error = 0; LIST_HEAD(sort_list); LIST_HEAD(cancel_list); LIST_HEAD(buffer_list); @@ -1692,9 +1693,17 @@ xlog_recover_reorder_trans( "%s: unrecognized type of log operation", __func__); ASSERT(0); - return XFS_ERROR(EIO); + /* + * return the remaining items back to the transaction + * item list so they can be freed in caller. + */ + if (!list_empty(&sort_list)) + list_splice_init(&sort_list, &trans->r_itemq); + error = XFS_ERROR(EIO); + goto out; } } +out: ASSERT(list_empty(&sort_list)); if (!list_empty(&buffer_list)) list_splice(&buffer_list, &trans->r_itemq); @@ -1704,7 +1713,7 @@ xlog_recover_reorder_trans( list_splice_tail(&inode_buffer_list, &trans->r_itemq); if (!list_empty(&cancel_list)) list_splice_tail(&cancel_list, &trans->r_itemq); - return 0; + return error; } /* @@ -3608,8 +3617,10 @@ xlog_recover_process_data( error = XFS_ERROR(EIO); break; } - if (error) + if (error) { + xlog_recover_free_trans(trans); return error; + } } dp += be32_to_cpu(ohead->oh_len); num_logops--; From ef701600fd26cace9d513ee174688a2b83832126 Mon Sep 17 00:00:00 2001 From: Mark Tinguely Date: Sat, 5 Oct 2013 21:48:25 -0500 Subject: [PATCH 09/51] xfs: fix memory leak in xfs_dir2_node_removename Fix the leak of kernel memory in xfs_dir2_node_removename() when xfs_dir2_leafn_remove() returns an error code. Signed-off-by: Mark Tinguely Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_dir2_node.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 56369d4509d5..48c7d18f68c3 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -2067,12 +2067,12 @@ xfs_dir2_node_lookup( */ int /* error */ xfs_dir2_node_removename( - xfs_da_args_t *args) /* operation arguments */ + struct xfs_da_args *args) /* operation arguments */ { - xfs_da_state_blk_t *blk; /* leaf block */ + struct xfs_da_state_blk *blk; /* leaf block */ int error; /* error return value */ int rval; /* operation return value */ - xfs_da_state_t *state; /* btree cursor */ + struct xfs_da_state *state; /* btree cursor */ trace_xfs_dir2_node_removename(args); @@ -2084,19 +2084,18 @@ xfs_dir2_node_removename( state->mp = args->dp->i_mount; state->blocksize = state->mp->m_dirblksize; state->node_ents = state->mp->m_dir_node_ents; - /* - * Look up the entry we're deleting, set up the cursor. - */ + + /* Look up the entry we're deleting, set up the cursor. */ error = xfs_da3_node_lookup_int(state, &rval); if (error) - rval = error; - /* - * Didn't find it, upper layer screwed up. - */ + goto out_free; + + /* Didn't find it, upper layer screwed up. */ if (rval != EEXIST) { - xfs_da_state_free(state); - return rval; + error = rval; + goto out_free; } + blk = &state->path.blk[state->path.active - 1]; ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC); ASSERT(state->extravalid); @@ -2107,7 +2106,7 @@ xfs_dir2_node_removename( error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, &state->extrablk, &rval); if (error) - return error; + goto out_free; /* * Fix the hash values up the btree. */ @@ -2122,6 +2121,7 @@ xfs_dir2_node_removename( */ if (!error) error = xfs_dir2_node_to_leaf(state); +out_free: xfs_da_state_free(state); return error; } From c61a9e39f637373929a110ad2a5922a2e8b00f4c Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 22 Nov 2013 14:04:00 +0800 Subject: [PATCH 10/51] xfs: make quota metadata truncation behavior consistent to user space In xfs_qm_scall_trunc_qfiles(), we ignore the error if failed to remove the users quota metadata and proceed to remove groups and projects if they are being there. However, in user space, the remove operation will break and return if failed to remove any kind of quota. Also for v5 super block, we can enabled both group and project quota at the same time, in this case the current error handling will cover the group error with projects but they might failed due to different reasons. It seems we'd better the error handling consistent to the user space and don't trying to remove another kind of quota metadata if the previous operation is failed. Signed-off-by: Jie Liu Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_qm_syscalls.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 437c9198031a..3daf5ea1eb8d 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -278,7 +278,7 @@ xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { - int error = 0, error2 = 0; + int error; if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { xfs_debug(mp, "%s: flags=%x m_qflags=%x", @@ -286,14 +286,20 @@ xfs_qm_scall_trunc_qfiles( return XFS_ERROR(EINVAL); } - if (flags & XFS_DQ_USER) + if (flags & XFS_DQ_USER) { error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); - if (flags & XFS_DQ_GROUP) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + if (error) + return error; + } + if (flags & XFS_DQ_GROUP) { + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + if (error) + return error; + } if (flags & XFS_DQ_PROJ) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); - return error ? error : error2; + return error; } /* From afbd123db4e72d5fe44db235976af64a22b32976 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Sat, 23 Nov 2013 00:15:43 +0800 Subject: [PATCH 11/51] xfs: integrate xfs_quota_priv header file to xfs_qm The xfs_quota_priv header file is only included by xfs_qm header and there is no much users for its contents, hence we can move those stuff to xfs_qm header file and kill it. This patch also remove an unused macro DQFLAGTO_TYPESTR. Signed-off-by: Jie Liu Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_qm.h | 18 +++++++++++++++++- fs/xfs/xfs_quota_priv.h | 42 ----------------------------------------- 2 files changed, 17 insertions(+), 43 deletions(-) delete mode 100644 fs/xfs/xfs_quota_priv.h diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index a788b66a5cb1..797fd4636273 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -20,12 +20,28 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" -#include "xfs_quota_priv.h" struct xfs_inode; extern struct kmem_zone *xfs_qm_dqtrxzone; +/* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE 10 + +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ + !dqp->q_core.d_blk_hardlimit && \ + !dqp->q_core.d_blk_softlimit && \ + !dqp->q_core.d_rtb_hardlimit && \ + !dqp->q_core.d_rtb_softlimit && \ + !dqp->q_core.d_ino_hardlimit && \ + !dqp->q_core.d_ino_softlimit && \ + !dqp->q_core.d_bcount && \ + !dqp->q_core.d_rtbcount && \ + !dqp->q_core.d_icount) + /* * This defines the unit of allocation of dquots. * Currently, it is just one file system block, and a 4K blk contains 30 diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h deleted file mode 100644 index 6d86219d93da..000000000000 --- a/fs/xfs/xfs_quota_priv.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QUOTA_PRIV_H__ -#define __XFS_QUOTA_PRIV_H__ - -/* - * Number of bmaps that we ask from bmapi when doing a quotacheck. - * We make this restriction to keep the memory usage to a minimum. - */ -#define XFS_DQITER_MAP_SIZE 10 - -#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ - !dqp->q_core.d_blk_hardlimit && \ - !dqp->q_core.d_blk_softlimit && \ - !dqp->q_core.d_rtb_hardlimit && \ - !dqp->q_core.d_rtb_softlimit && \ - !dqp->q_core.d_ino_hardlimit && \ - !dqp->q_core.d_ino_softlimit && \ - !dqp->q_core.d_bcount && \ - !dqp->q_core.d_rtbcount && \ - !dqp->q_core.d_icount) - -#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ - (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) - -#endif /* __XFS_QUOTA_PRIV_H__ */ From 37eb9706ebf5b99d14c6086cdeef2c2f73f9c9fb Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 26 Nov 2013 21:38:54 +0800 Subject: [PATCH 12/51] xfs: fix false assertion at xfs_qm_vop_create_dqattach After the previous fix, there still has another ASSERT failure if turning off any type of quota while fsstress is running at the same time. Backtrace in this case: [ 50.867897] XFS: Assertion failed: XFS_IS_GQUOTA_ON(mp), file: fs/xfs/xfs_qm.c, line: 2118 [ 50.867924] ------------[ cut here ]------------ ... [ 50.867957] Kernel BUG at ffffffffa0b55a32 [verbose debug info unavailable] [ 50.867999] invalid opcode: 0000 [#1] SMP [ 50.869407] Call Trace: [ 50.869446] [] xfs_qm_vop_create_dqattach+0x19a/0x2d0 [xfs] [ 50.869512] [] xfs_create+0x5c5/0x6a0 [xfs] [ 50.869564] [] xfs_vn_mknod+0xac/0x1d0 [xfs] [ 50.869615] [] xfs_vn_mkdir+0x16/0x20 [xfs] [ 50.869655] [] vfs_mkdir+0x95/0x130 [ 50.869689] [] SyS_mkdirat+0xaa/0xe0 [ 50.869723] [] SyS_mkdir+0x19/0x20 [ 50.869757] [] system_call_fastpath+0x1a/0x1f [ 50.869793] Code: 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 [ 50.870003] RIP [] assfail+0x22/0x30 [xfs] [ 50.870050] RSP [ 50.879251] ---[ end trace c93a2b342341c65b ]--- We're hitting the ASSERT(XFS_IS_*QUOTA_ON(mp)) in xfs_qm_vop_create_dqattach(), however the assertion itself is not right IMHO. While performing quota off, we firstly clear the XFS_*QUOTA_ACTIVE bit(s) from struct xfs_mount without taking any special locks, see xfs_qm_scall_quotaoff(). Hence there is no guarantee that the desired quota is still active. Signed-off-by: Jie Liu Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_qm.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 14a4996cfec6..588e4909c589 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -2082,24 +2082,21 @@ xfs_qm_vop_create_dqattach( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - if (udqp) { + if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); - ASSERT(XFS_IS_UQUOTA_ON(mp)); ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); ip->i_udquot = xfs_qm_dqhold(udqp); xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (gdqp) { + if (gdqp && XFS_IS_GQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); - ASSERT(XFS_IS_GQUOTA_ON(mp)); ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); ip->i_gdquot = xfs_qm_dqhold(gdqp); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } - if (pdqp) { + if (pdqp && XFS_IS_PQUOTA_ON(mp)) { ASSERT(ip->i_pdquot == NULL); - ASSERT(XFS_IS_PQUOTA_ON(mp)); ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); ip->i_pdquot = xfs_qm_dqhold(pdqp); From 0c3d88dfcedf92b28d759182ecb33f2808dc3e59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Nov 2013 05:10:40 -0800 Subject: [PATCH 13/51] xfs: tiny xfs_setattr_mode cleanup Remove the pointless tp argument, and properly align the local variable declarations. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Ben Myers --- fs/xfs/xfs_iops.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 27e0e544e963..e7f4e4f4eab9 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -459,14 +459,12 @@ xfs_vn_getattr( static void xfs_setattr_mode( - struct xfs_trans *tp, struct xfs_inode *ip, struct iattr *iattr) { - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; + struct inode *inode = VFS_I(ip); + umode_t mode = iattr->ia_mode; - ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ip->i_d.di_mode &= S_IFMT; @@ -633,7 +631,7 @@ xfs_setattr_nonsize( * Change file access modes. */ if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); + xfs_setattr_mode(ip, iattr); /* * Change file access or modified times. @@ -871,7 +869,7 @@ xfs_setattr_size( * Change file access modes. */ if (mask & ATTR_MODE) - xfs_setattr_mode(tp, ip, iattr); + xfs_setattr_mode(ip, iattr); if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; From c91c46c12768daac8486dff0f74bc52c2ec974cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Nov 2013 05:10:52 -0800 Subject: [PATCH 14/51] xfs: add xfs_setattr_time Split out a xfs_setattr_time helper to share code between truncate and regular setattr similar to xfs_setattr_mode. I might also have another caller growing for this in the near future. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Signed-off-by: Ben Myers --- fs/xfs/xfs_iops.c | 66 +++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e7f4e4f4eab9..5762282895a5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -474,6 +474,32 @@ xfs_setattr_mode( inode->i_mode |= mode & ~S_IFMT; } +static void +xfs_setattr_time( + struct xfs_inode *ip, + struct iattr *iattr) +{ + struct inode *inode = VFS_I(ip); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (iattr->ia_valid & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + } + if (iattr->ia_valid & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + } + if (iattr->ia_valid & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + } +} + int xfs_setattr_nonsize( struct xfs_inode *ip, @@ -627,30 +653,10 @@ xfs_setattr_nonsize( } } - /* - * Change file access modes. - */ if (mask & ATTR_MODE) xfs_setattr_mode(ip, iattr); - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -865,22 +871,10 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - /* - * Change file access modes. - */ if (mask & ATTR_MODE) xfs_setattr_mode(ip, iattr); - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - } + if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) + xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); From 5a01dd54f4a7fb513062070c5acef20d13cad980 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 26 Nov 2013 21:38:34 +0800 Subject: [PATCH 15/51] xfs: fix assertion failure at xfs_setattr_nonsize For CRC enabled v5 super block, change a file's ownership can simply trigger an ASSERT failure at xfs_setattr_nonsize() if both group and project quota are enabled, i.e, [ 305.337609] XFS: Assertion failed: !XFS_IS_PQUOTA_ON(mp), file: fs/xfs/xfs_iops.c, line: 621 [ 305.339250] Kernel BUG at ffffffffa0a7fa32 [verbose debug info unavailable] [ 305.383939] Call Trace: [ 305.385536] [] xfs_setattr_nonsize+0x69a/0x720 [xfs] [ 305.387142] [] xfs_vn_setattr+0x29/0x70 [xfs] [ 305.388727] [] notify_change+0x1a8/0x350 [ 305.390298] [] chown_common+0xfd/0x110 [ 305.391868] [] SyS_fchownat+0xaf/0x110 [ 305.393440] [] SyS_lchown+0x20/0x30 [ 305.394995] [] system_call_fastpath+0x1a/0x1f [ 305.399870] RIP [] assfail+0x22/0x30 [xfs] This fix adjust the assertion to check if the super block support both quota inodes or not. Signed-off-by: Jie Liu Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_iops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 5762282895a5..0ce1d759156e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -642,7 +642,8 @@ xfs_setattr_nonsize( } if (!gid_eq(igid, gid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || + !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, From df8052e7dae00bde6f21b40b6e3e1099770f3afc Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 26 Nov 2013 21:38:49 +0800 Subject: [PATCH 16/51] xfs: fix infinite loop by detaching the group/project hints from user dquot xfs_quota(8) will hang up if trying to turn group/project quota off before the user quota is off, this could be 100% reproduced by: # mount -ouquota,gquota /dev/sda7 /xfs # mkdir /xfs/test # xfs_quota -xc 'off -g' /xfs <-- hangs up # echo w > /proc/sysrq-trigger # dmesg SysRq : Show Blocked State task PC stack pid father xfs_quota D 0000000000000000 0 27574 2551 0x00000000 [snip] Call Trace: [] schedule+0xad/0xc0 [] schedule_timeout+0x35e/0x3c0 [] ? mark_held_locks+0x176/0x1c0 [] ? call_timer_fn+0x2c0/0x2c0 [] ? xfs_qm_shrink_count+0x30/0x30 [xfs] [] schedule_timeout_uninterruptible+0x26/0x30 [] xfs_qm_dquot_walk+0x235/0x260 [xfs] [] ? xfs_perag_get+0x1d8/0x2d0 [xfs] [] ? xfs_perag_get+0x5/0x2d0 [xfs] [] ? xfs_inode_ag_iterator+0xae/0xf0 [xfs] [] ? xfs_trans_free_dqinfo+0x50/0x50 [xfs] [] ? xfs_inode_ag_iterator+0xcf/0xf0 [xfs] [] xfs_qm_dqpurge_all+0x66/0xb0 [xfs] [] xfs_qm_scall_quotaoff+0x20a/0x5f0 [xfs] [] xfs_fs_set_xstate+0x136/0x180 [xfs] [] do_quotactl+0x53a/0x6b0 [] ? iput+0x5b/0x90 [] SyS_quotactl+0x167/0x1d0 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b It's fine if we turn user quota off at first, then turn off other kind of quotas if they are enabled since the group/project dquot refcount is decreased to zero once the user quota if off. Otherwise, those dquots refcount is non-zero due to the user dquot might refer to them as hint(s). Hence, above operation cause an infinite loop at xfs_qm_dquot_walk() while trying to purge dquot cache. This problem has been around since Linux 3.4, it was introduced by: [ b84a3a9675 xfs: remove the per-filesystem list of dquots ] Originally we will release the group dquot pointers because the user dquots maybe carrying around as a hint via xfs_qm_detach_gdquots(). However, with above change, there is no such work to be done before purging group/project dquot cache. In order to solve this problem, this patch introduces a special routine xfs_qm_dqpurge_hints(), and it would release the group/project dquot pointers the user dquots maybe carrying around as a hint, and then it will proceed to purge the user dquot cache if requested. Cc: stable@vger.kernel.org Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_qm.c | 71 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 588e4909c589..dd88f0e27bd8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,8 +134,6 @@ xfs_qm_dqpurge( { struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *pdqp = NULL; xfs_dqlock(dqp); if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { @@ -143,21 +141,6 @@ xfs_qm_dqpurge( return EAGAIN; } - /* - * If this quota has a hint attached, prepare for releasing it now. - */ - gdqp = dqp->q_gdquot; - if (gdqp) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - - pdqp = dqp->q_pdquot; - if (pdqp) { - xfs_dqlock(pdqp); - dqp->q_pdquot = NULL; - } - dqp->dq_flags |= XFS_DQ_FREEING; xfs_dqflock(dqp); @@ -206,11 +189,47 @@ xfs_qm_dqpurge( XFS_STATS_DEC(xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); + return 0; +} + +/* + * Release the group or project dquot pointers the user dquots maybe carrying + * around as a hint, and proceed to purge the user dquot cache if requested. +*/ +STATIC int +xfs_qm_dqpurge_hints( + struct xfs_dquot *dqp, + void *data) +{ + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; + uint flags = *((uint *)data); + + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_FREEING) { + xfs_dqunlock(dqp); + return EAGAIN; + } + + /* If this quota has a hint attached, prepare for releasing it now */ + gdqp = dqp->q_gdquot; + if (gdqp) + dqp->q_gdquot = NULL; + + pdqp = dqp->q_pdquot; + if (pdqp) + dqp->q_pdquot = NULL; + + xfs_dqunlock(dqp); if (gdqp) - xfs_qm_dqput(gdqp); + xfs_qm_dqrele(gdqp); if (pdqp) - xfs_qm_dqput(pdqp); + xfs_qm_dqrele(pdqp); + + if (flags & XFS_QMOPT_UQUOTA) + return xfs_qm_dqpurge(dqp, NULL); + return 0; } @@ -222,8 +241,18 @@ xfs_qm_dqpurge_all( struct xfs_mount *mp, uint flags) { - if (flags & XFS_QMOPT_UQUOTA) - xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL); + /* + * We have to release group/project dquot hint(s) from the user dquot + * at first if they are there, otherwise we would run into an infinite + * loop while walking through radix tree to purge other type of dquots + * since their refcount is not zero if the user dquot refers to them + * as hint. + * + * Call the special xfs_qm_dqpurge_hints() will end up go through the + * general xfs_qm_dqpurge() against user dquot cache if requested. + */ + xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge_hints, &flags); + if (flags & XFS_QMOPT_GQUOTA) xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL); if (flags & XFS_QMOPT_PQUOTA) From b3f03bac8132207a20286d5602eda64500c19724 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 3 Dec 2013 23:50:57 +1100 Subject: [PATCH 17/51] xfs: xfs_dir2_block_to_sf temp buffer allocation fails If we are using a large directory block size, and memory becomes fragmented, we can get memory allocation failures trying to kmem_alloc(64k) for a temporary buffer. However, there is not need for a directory buffer sized allocation, as the end result ends up in the inode literal area. This is, at most, slightly less than 2k of space, and hence we don't need an allocation larger than that fora temporary buffer. Signed-off-by: Dave Chinner Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_dir2_sf.c | 58 ++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index aafc6e46cb58..3725fb1b902b 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -170,6 +170,7 @@ xfs_dir2_block_to_sf( char *ptr; /* current data pointer */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */ + xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */ trace_xfs_dir2_block_to_sf(args); @@ -177,35 +178,20 @@ xfs_dir2_block_to_sf( mp = dp->i_mount; /* - * Make a copy of the block data, so we can shrink the inode - * and add local data. + * allocate a temporary destination buffer the size of the inode + * to format the data into. Once we have formatted the data, we + * can free the block and copy the formatted data into the inode literal + * area. */ - hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); - memcpy(hdr, bp->b_addr, mp->m_dirblksize); - logflags = XFS_ILOG_CORE; - if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { - ASSERT(error != ENOSPC); - goto out; - } + dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); + hdr = bp->b_addr; - /* - * The buffer is now unconditionally gone, whether - * xfs_dir2_shrink_inode worked or not. - * - * Convert the inode to local format. - */ - dp->i_df.if_flags &= ~XFS_IFEXTENTS; - dp->i_df.if_flags |= XFS_IFINLINE; - dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; - ASSERT(dp->i_df.if_bytes == 0); - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - logflags |= XFS_ILOG_DDATA; /* * Copy the header into the newly allocate local space. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = (xfs_dir2_sf_hdr_t *)dst; memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); - dp->i_d.di_size = size; + /* * Set up to loop over the block's entries. */ @@ -258,10 +244,34 @@ xfs_dir2_block_to_sf( ptr += dp->d_ops->data_entsize(dep->namelen); } ASSERT((char *)sfep - (char *)sfp == size); + + /* now we are done with the block, we can shrink the inode */ + logflags = XFS_ILOG_CORE; + error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp); + if (error) { + ASSERT(error != ENOSPC); + goto out; + } + + /* + * The buffer is now unconditionally gone, whether + * xfs_dir2_shrink_inode worked or not. + * + * Convert the inode to local format and copy the data in. + */ + dp->i_df.if_flags &= ~XFS_IFEXTENTS; + dp->i_df.if_flags |= XFS_IFINLINE; + dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; + ASSERT(dp->i_df.if_bytes == 0); + xfs_idata_realloc(dp, size, XFS_DATA_FORK); + + logflags |= XFS_ILOG_DDATA; + memcpy(dp->i_df.if_u1.if_data, dst, size); + dp->i_d.di_size = size; xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(hdr); + kmem_free(dst); return error; } From db10bddc7d4f412bcd8630fc479fa1eb009e325b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 8 Dec 2013 23:33:50 +0900 Subject: [PATCH 18/51] MAINTAINERS: fix incorrect mail address of XFS maintainer When I tried to send the patches to XFS Maintainers, I got returned mail included delivery fail message for Dave's mail. Maybe, Dave Chinner mail address is incorrect. I try to fix it correctly. Signed-off-by: Namjae Jeon Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f216db847022..f5d0bddb6cfa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9530,7 +9530,7 @@ F: drivers/xen/*swiotlb* XFS FILESYSTEM P: Silicon Graphics Inc -M: Dave Chinner +M: Dave Chinner M: Ben Myers M: xfs@oss.sgi.com L: xfs@oss.sgi.com From 8e825e3a02ff20973154559c33e662cacedc4458 Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Tue, 10 Dec 2013 14:59:31 -0600 Subject: [PATCH 19/51] xfs: fix calculation of freed inode cluster blocks rec.ir_startino is an agino rather than an ino. Use the correct macro when dealing with it in xfs_difree. Signed-off-by: Ben Myers Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index e87719c5bebe..7a728f9fc0be 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1229,7 +1229,7 @@ xfs_difree( } xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, - agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), + agno, XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), XFS_IALLOC_BLOCKS(mp), flist, mp); } else { *delete = 0; From f9b395a8ef8f34d19cae2cde361e19c96e097fad Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 22 Nov 2013 10:41:16 +1100 Subject: [PATCH 20/51] xfs: align initial file allocations correctly The function xfs_bmap_isaeof() is used to indicate that an allocation is occurring at or past the end of file, and as such should be aligned to the underlying storage geometry if possible. Commit 27a3f8f ("xfs: introduce xfs_bmap_last_extent") changed the behaviour of this function for empty files - it turned off allocation alignment for this case accidentally. Hence large initial allocations from direct IO are not getting correctly aligned to the underlying geometry, and that is cause write performance to drop in alignment sensitive configurations. Fix it by considering allocation into empty files as requiring aligned allocation again. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3ef11b22e750..8401f11f378f 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1635,7 +1635,7 @@ xfs_bmap_last_extent( * blocks at the end of the file which do not start at the previous data block, * we will try to align the new blocks at stripe unit boundaries. * - * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be * at, or past the EOF. */ STATIC int @@ -1650,9 +1650,14 @@ xfs_bmap_isaeof( bma->aeof = 0; error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, &is_empty); - if (error || is_empty) + if (error) return error; + if (is_empty) { + bma->aeof = 1; + return 0; + } + /* * Check if we are allocation or past the last extent, or at least into * the last delayed allocated extent. From 9597df6b26a1988a5a04762711149f98ec6ab388 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:00:42 +1100 Subject: [PATCH 21/51] xfs: remove duplicate code in xlog_cil_insert_format_items Share code that was previously duplicated in two branches. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_cil.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5eb51fc5eb84..0a7a8cef6019 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -254,29 +254,22 @@ xlog_cil_insert_format_items( */ *diff_iovecs -= lv->lv_niovecs; *diff_len -= lv->lv_buf_len; - - /* Ensure the lv is set up according to ->iop_size */ - lv->lv_niovecs = niovecs; - lv->lv_buf = (char *)lv + buf_size - nbytes; - - lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); - goto insert; + } else { + /* allocate new data chunk */ + lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); + lv->lv_item = lip; + lv->lv_size = buf_size; + if (ordered) { + /* track as an ordered logvec */ + ASSERT(lip->li_lv == NULL); + lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + goto insert; + } + lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; } - /* allocate new data chunk */ - lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); - lv->lv_item = lip; - lv->lv_size = buf_size; + /* Ensure the lv is set up according to ->iop_size */ lv->lv_niovecs = niovecs; - if (ordered) { - /* track as an ordered logvec */ - ASSERT(lip->li_lv == NULL); - lv->lv_buf_len = XFS_LOG_VEC_ORDERED; - goto insert; - } - - /* The allocated iovec region lies beyond the log vector. */ - lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; /* The allocated data region lies beyond the iovec region */ lv->lv_buf = (char *)lv + buf_size - nbytes; From 7aeb72224120e0c49ba4c93d75f8f0d6a87f6afd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:00:43 +1100 Subject: [PATCH 22/51] xfs: refactor xfs_buf_item_format_segment Add two helpers to make the code more readable. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf_item.c | 72 +++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a64f67ba25d3..a30c1fb1bec6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -182,6 +182,34 @@ xfs_buf_item_size( trace_xfs_buf_item_size(bip); } +static inline struct xfs_log_iovec * +xfs_buf_item_copy_iovec( + struct xfs_log_iovec *vecp, + struct xfs_buf *bp, + uint offset, + int first_bit, + uint nbits) +{ + offset += first_bit * XFS_BLF_CHUNK; + + vecp->i_type = XLOG_REG_TYPE_BCHUNK; + vecp->i_addr = xfs_buf_offset(bp, offset); + vecp->i_len = nbits * XFS_BLF_CHUNK; + return vecp + 1; +} + +static inline bool +xfs_buf_item_straddle( + struct xfs_buf *bp, + uint offset, + int next_bit, + int last_bit) +{ + return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) != + (xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) + + XFS_BLF_CHUNK); +} + static struct xfs_log_iovec * xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, @@ -196,7 +224,6 @@ xfs_buf_item_format_segment( int last_bit; int next_bit; uint nbits; - uint buffer_offset; /* copy the flags across from the base format item */ blfp->blf_flags = bip->__bli_format.blf_flags; @@ -239,7 +266,6 @@ xfs_buf_item_format_segment( /* * Fill in an iovec for each set of contiguous chunks. */ - last_bit = first_bit; nbits = 1; for (;;) { @@ -252,42 +278,22 @@ xfs_buf_item_format_segment( next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, (uint)last_bit + 1); /* - * If we run out of bits fill in the last iovec and get - * out of the loop. - * Else if we start a new set of bits then fill in the - * iovec for the series we were looking at and start - * counting the bits in the new one. - * Else we're still in the same set of bits so just - * keep counting and scanning. + * If we run out of bits fill in the last iovec and get out of + * the loop. Else if we start a new set of bits then fill in + * the iovec for the series we were looking at and start + * counting the bits in the new one. Else we're still in the + * same set of bits so just keep counting and scanning. */ if (next_bit == -1) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; + xfs_buf_item_copy_iovec(vecp, bp, offset, + first_bit, nbits); nvecs++; break; - } else if (next_bit != last_bit + 1) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; + } else if (next_bit != last_bit + 1 || + xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { + vecp = xfs_buf_item_copy_iovec(vecp, bp, offset, + first_bit, nbits); nvecs++; - vecp++; - first_bit = next_bit; - last_bit = next_bit; - nbits = 1; - } else if (xfs_buf_offset(bp, offset + - (next_bit << XFS_BLF_SHIFT)) != - (xfs_buf_offset(bp, offset + - (last_bit << XFS_BLF_SHIFT)) + - XFS_BLF_CHUNK)) { - buffer_offset = offset + first_bit * XFS_BLF_CHUNK; - vecp->i_addr = xfs_buf_offset(bp, buffer_offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - vecp->i_type = XLOG_REG_TYPE_BCHUNK; - nvecs++; - vecp++; first_bit = next_bit; last_bit = next_bit; nbits = 1; From ce9641d6c981aad0463b2d1455f0b60e5c8671c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:00:43 +1100 Subject: [PATCH 23/51] xfs: refactor xfs_inode_item_size Split out two helpers to size the data and attribute to make the function more readable. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode_item.c | 156 +++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 74 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 7c0d391f9a6e..050d2540f7b4 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -39,6 +39,85 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_inode_log_item, ili_item); } +STATIC void +xfs_inode_item_data_fork_size( + struct xfs_inode_log_item *iip, + int *nvecs, + int *nbytes) +{ + struct xfs_inode *ip = iip->ili_inode; + + switch (ip->i_d.di_format) { + case XFS_DINODE_FMT_EXTENTS: + if ((iip->ili_fields & XFS_ILOG_DEXT) && + ip->i_d.di_nextents > 0 && + ip->i_df.if_bytes > 0) { + /* worst case, doesn't subtract delalloc extents */ + *nbytes += XFS_IFORK_DSIZE(ip); + *nvecs += 1; + } + break; + case XFS_DINODE_FMT_BTREE: + if ((iip->ili_fields & XFS_ILOG_DBROOT) && + ip->i_df.if_broot_bytes > 0) { + *nbytes += ip->i_df.if_broot_bytes; + *nvecs += 1; + } + break; + case XFS_DINODE_FMT_LOCAL: + if ((iip->ili_fields & XFS_ILOG_DDATA) && + ip->i_df.if_bytes > 0) { + *nbytes += roundup(ip->i_df.if_bytes, 4); + *nvecs += 1; + } + break; + + case XFS_DINODE_FMT_DEV: + case XFS_DINODE_FMT_UUID: + break; + default: + ASSERT(0); + break; + } +} + +STATIC void +xfs_inode_item_attr_fork_size( + struct xfs_inode_log_item *iip, + int *nvecs, + int *nbytes) +{ + struct xfs_inode *ip = iip->ili_inode; + + switch (ip->i_d.di_aformat) { + case XFS_DINODE_FMT_EXTENTS: + if ((iip->ili_fields & XFS_ILOG_AEXT) && + ip->i_d.di_anextents > 0 && + ip->i_afp->if_bytes > 0) { + /* worst case, doesn't subtract unused space */ + *nbytes += XFS_IFORK_ASIZE(ip); + *nvecs += 1; + } + break; + case XFS_DINODE_FMT_BTREE: + if ((iip->ili_fields & XFS_ILOG_ABROOT) && + ip->i_afp->if_broot_bytes > 0) { + *nbytes += ip->i_afp->if_broot_bytes; + *nvecs += 1; + } + break; + case XFS_DINODE_FMT_LOCAL: + if ((iip->ili_fields & XFS_ILOG_ADATA) && + ip->i_afp->if_bytes > 0) { + *nbytes += roundup(ip->i_afp->if_bytes, 4); + *nvecs += 1; + } + break; + default: + ASSERT(0); + break; + } +} /* * This returns the number of iovecs needed to log the given inode item. @@ -60,80 +139,9 @@ xfs_inode_item_size( *nbytes += sizeof(struct xfs_inode_log_format) + xfs_icdinode_size(ip->i_d.di_version); - switch (ip->i_d.di_format) { - case XFS_DINODE_FMT_EXTENTS: - if ((iip->ili_fields & XFS_ILOG_DEXT) && - ip->i_d.di_nextents > 0 && - ip->i_df.if_bytes > 0) { - /* worst case, doesn't subtract delalloc extents */ - *nbytes += XFS_IFORK_DSIZE(ip); - *nvecs += 1; - } - break; - - case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & XFS_ILOG_DBROOT) && - ip->i_df.if_broot_bytes > 0) { - *nbytes += ip->i_df.if_broot_bytes; - *nvecs += 1; - } - break; - - case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & XFS_ILOG_DDATA) && - ip->i_df.if_bytes > 0) { - *nbytes += roundup(ip->i_df.if_bytes, 4); - *nvecs += 1; - } - break; - - case XFS_DINODE_FMT_DEV: - case XFS_DINODE_FMT_UUID: - break; - - default: - ASSERT(0); - break; - } - - if (!XFS_IFORK_Q(ip)) - return; - - - /* - * Log any necessary attribute data. - */ - switch (ip->i_d.di_aformat) { - case XFS_DINODE_FMT_EXTENTS: - if ((iip->ili_fields & XFS_ILOG_AEXT) && - ip->i_d.di_anextents > 0 && - ip->i_afp->if_bytes > 0) { - /* worst case, doesn't subtract unused space */ - *nbytes += XFS_IFORK_ASIZE(ip); - *nvecs += 1; - } - break; - - case XFS_DINODE_FMT_BTREE: - if ((iip->ili_fields & XFS_ILOG_ABROOT) && - ip->i_afp->if_broot_bytes > 0) { - *nbytes += ip->i_afp->if_broot_bytes; - *nvecs += 1; - } - break; - - case XFS_DINODE_FMT_LOCAL: - if ((iip->ili_fields & XFS_ILOG_ADATA) && - ip->i_afp->if_bytes > 0) { - *nbytes += roundup(ip->i_afp->if_bytes, 4); - *nvecs += 1; - } - break; - - default: - ASSERT(0); - break; - } + xfs_inode_item_data_fork_size(iip, nvecs, nbytes); + if (XFS_IFORK_Q(ip)) + xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } /* From 3de559fbd04d67473b9be2bd183823c40c4b7557 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:00:43 +1100 Subject: [PATCH 24/51] xfs: refactor xfs_inode_item_format Split out a function to handle the data and attr fork, as well as a helper for the really old v1 inodes. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode_item.c | 167 ++++++++++++++++++++++------------------ 1 file changed, 90 insertions(+), 77 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 050d2540f7b4..2ad12dcf8311 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -180,63 +180,41 @@ xfs_inode_item_format_extents( } /* - * This is called to fill in the vector of log iovecs for the - * given inode log item. It fills the first item with an inode - * log format structure, the second with the on-disk inode structure, - * and a possible third and/or fourth with the inode data/extents/b-tree - * root and inode attributes data/extents/b-tree root. + * If this is a v1 format inode, then we need to log it as such. This means + * that we have to copy the link count from the new field to the old. We + * don't have to worry about the new fields, because nothing trusts them as + * long as the old inode version number is there. */ STATIC void -xfs_inode_item_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) +xfs_inode_item_format_v1_inode( + struct xfs_inode *ip) { - struct xfs_inode_log_item *iip = INODE_ITEM(lip); - struct xfs_inode *ip = iip->ili_inode; - uint nvecs; - size_t data_bytes; - xfs_mount_t *mp; - - vecp->i_addr = &iip->ili_format; - vecp->i_len = sizeof(xfs_inode_log_format_t); - vecp->i_type = XLOG_REG_TYPE_IFORMAT; - vecp++; - nvecs = 1; - - vecp->i_addr = &ip->i_d; - vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); - vecp->i_type = XLOG_REG_TYPE_ICORE; - vecp++; - nvecs++; - - /* - * If this is really an old format inode, then we need to - * log it as such. This means that we have to copy the link - * count from the new field to the old. We don't have to worry - * about the new fields, because nothing trusts them as long as - * the old inode version number is there. If the superblock already - * has a new version number, then we don't bother converting back. - */ - mp = ip->i_mount; - ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); - if (ip->i_d.di_version == 1) { - if (!xfs_sb_version_hasnlink(&mp->m_sb)) { - /* - * Convert it back. - */ - ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); - ip->i_d.di_onlink = ip->i_d.di_nlink; - } else { - /* - * The superblock version has already been bumped, - * so just make the conversion to the new inode - * format permanent. - */ - ip->i_d.di_version = 2; - ip->i_d.di_onlink = 0; - memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); - } + if (!xfs_sb_version_hasnlink(&ip->i_mount->m_sb)) { + /* + * Convert it back. + */ + ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); + ip->i_d.di_onlink = ip->i_d.di_nlink; + } else { + /* + * The superblock version has already been bumped, + * so just make the conversion to the new inode + * format permanent. + */ + ip->i_d.di_version = 2; + ip->i_d.di_onlink = 0; + memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); } +} + +STATIC struct xfs_log_iovec * +xfs_inode_item_format_data_fork( + struct xfs_inode_log_item *iip, + struct xfs_log_iovec *vecp, + int *nvecs) +{ + struct xfs_inode *ip = iip->ili_inode; + size_t data_bytes; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: @@ -271,12 +249,11 @@ xfs_inode_item_format( ASSERT(vecp->i_len <= ip->i_df.if_bytes); iip->ili_format.ilf_dsize = vecp->i_len; vecp++; - nvecs++; + (*nvecs)++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } break; - case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | @@ -289,7 +266,7 @@ xfs_inode_item_format( vecp->i_len = ip->i_df.if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IBROOT; vecp++; - nvecs++; + (*nvecs)++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } else { ASSERT(!(iip->ili_fields & @@ -297,7 +274,6 @@ xfs_inode_item_format( iip->ili_fields &= ~XFS_ILOG_DBROOT; } break; - case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | @@ -319,13 +295,12 @@ xfs_inode_item_format( vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_ILOCAL; vecp++; - nvecs++; + (*nvecs)++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } break; - case XFS_DINODE_FMT_DEV: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | @@ -335,7 +310,6 @@ xfs_inode_item_format( ip->i_df.if_u2.if_rdev; } break; - case XFS_DINODE_FMT_UUID: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | @@ -345,20 +319,22 @@ xfs_inode_item_format( ip->i_df.if_u2.if_uuid; } break; - default: ASSERT(0); break; } - /* - * If there are no attributes associated with the file, then we're done. - */ - if (!XFS_IFORK_Q(ip)) { - iip->ili_fields &= - ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); - goto out; - } + return vecp; +} + +STATIC struct xfs_log_iovec * +xfs_inode_item_format_attr_fork( + struct xfs_inode_log_item *iip, + struct xfs_log_iovec *vecp, + int *nvecs) +{ + struct xfs_inode *ip = iip->ili_inode; + size_t data_bytes; switch (ip->i_d.di_aformat) { case XFS_DINODE_FMT_EXTENTS: @@ -386,12 +362,11 @@ xfs_inode_item_format( #endif iip->ili_format.ilf_asize = vecp->i_len; vecp++; - nvecs++; + (*nvecs)++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } break; - case XFS_DINODE_FMT_BTREE: iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); @@ -404,13 +379,12 @@ xfs_inode_item_format( vecp->i_len = ip->i_afp->if_broot_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; vecp++; - nvecs++; + (*nvecs)++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } break; - case XFS_DINODE_FMT_LOCAL: iip->ili_fields &= ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT); @@ -431,19 +405,59 @@ xfs_inode_item_format( vecp->i_len = (int)data_bytes; vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; vecp++; - nvecs++; + (*nvecs)++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } break; - default: ASSERT(0); break; } -out: + return vecp; +} + +/* + * This is called to fill in the vector of log iovecs for the given inode + * log item. It fills the first item with an inode log format structure, + * the second with the on-disk inode structure, and a possible third and/or + * fourth with the inode data/extents/b-tree root and inode attributes + * data/extents/b-tree root. + */ +STATIC void +xfs_inode_item_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *vecp) +{ + struct xfs_inode_log_item *iip = INODE_ITEM(lip); + struct xfs_inode *ip = iip->ili_inode; + uint nvecs; + + vecp->i_addr = &iip->ili_format; + vecp->i_len = sizeof(xfs_inode_log_format_t); + vecp->i_type = XLOG_REG_TYPE_IFORMAT; + vecp++; + nvecs = 1; + + vecp->i_addr = &ip->i_d; + vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); + vecp->i_type = XLOG_REG_TYPE_ICORE; + vecp++; + nvecs++; + + if (ip->i_d.di_version == 1) + xfs_inode_item_format_v1_inode(ip); + + vecp = xfs_inode_item_format_data_fork(iip, vecp, &nvecs); + if (XFS_IFORK_Q(ip)) { + vecp = xfs_inode_item_format_attr_fork(iip, vecp, &nvecs); + } else { + iip->ili_fields &= + ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); + } + /* * Now update the log format that goes out to disk from the in-core * values. We always write the inode core to make the arithmetic @@ -455,7 +469,6 @@ out: iip->ili_format.ilf_size = nvecs; } - /* * This is called to pin the inode associated with the inode log * item in memory so it cannot be written out. From 1234351cba958cd5d4338172ccfc869a687cd736 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:00:43 +1100 Subject: [PATCH 25/51] xfs: introduce xlog_copy_iovec Add a helper to abstract out filling the log iovecs in the log item format handlers. This will allow us to change the way we do the log item formatting more easily. The copy in the name is a bit confusing for now as it just assigns a pointer and lets the CIL code perform the copy, but that will change soon. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf_item.c | 30 ++++------ fs/xfs/xfs_dquot_item.c | 25 ++++----- fs/xfs/xfs_extfree_item.c | 19 +++---- fs/xfs/xfs_icreate_item.c | 9 +-- fs/xfs/xfs_inode_item.c | 115 +++++++++++++++++--------------------- fs/xfs/xfs_log.h | 13 +++++ 6 files changed, 103 insertions(+), 108 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a30c1fb1bec6..d49419d4bb46 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -182,20 +182,18 @@ xfs_buf_item_size( trace_xfs_buf_item_size(bip); } -static inline struct xfs_log_iovec * +static inline void xfs_buf_item_copy_iovec( - struct xfs_log_iovec *vecp, + struct xfs_log_iovec **vecp, struct xfs_buf *bp, uint offset, int first_bit, uint nbits) { offset += first_bit * XFS_BLF_CHUNK; - - vecp->i_type = XLOG_REG_TYPE_BCHUNK; - vecp->i_addr = xfs_buf_offset(bp, offset); - vecp->i_len = nbits * XFS_BLF_CHUNK; - return vecp + 1; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_BCHUNK, + xfs_buf_offset(bp, offset), + nbits * XFS_BLF_CHUNK); } static inline bool @@ -210,10 +208,10 @@ xfs_buf_item_straddle( XFS_BLF_CHUNK); } -static struct xfs_log_iovec * +static void xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, - struct xfs_log_iovec *vecp, + struct xfs_log_iovec **vecp, uint offset, struct xfs_buf_log_format *blfp) { @@ -245,10 +243,7 @@ xfs_buf_item_format_segment( goto out; } - vecp->i_addr = blfp; - vecp->i_len = base_size; - vecp->i_type = XLOG_REG_TYPE_BFORMAT; - vecp++; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); nvecs = 1; if (bip->bli_flags & XFS_BLI_STALE) { @@ -291,8 +286,8 @@ xfs_buf_item_format_segment( break; } else if (next_bit != last_bit + 1 || xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { - vecp = xfs_buf_item_copy_iovec(vecp, bp, offset, - first_bit, nbits); + xfs_buf_item_copy_iovec(vecp, bp, offset, + first_bit, nbits); nvecs++; first_bit = next_bit; last_bit = next_bit; @@ -304,7 +299,6 @@ xfs_buf_item_format_segment( } out: blfp->blf_size = nvecs; - return vecp; } /* @@ -360,8 +354,8 @@ xfs_buf_item_format( } for (i = 0; i < bip->bli_format_count; i++) { - vecp = xfs_buf_item_format_segment(bip, vecp, offset, - &bip->bli_formats[i]); + xfs_buf_item_format_segment(bip, &vecp, offset, + &bip->bli_formats[i]); offset += bp->b_maps[i].bm_len; } diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 92e5f62eefc6..ca354a821838 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -57,20 +57,18 @@ xfs_qm_dquot_logitem_size( STATIC void xfs_qm_dquot_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *logvec) + struct xfs_log_iovec *vecp) { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - logvec->i_addr = &qlip->qli_format; - logvec->i_len = sizeof(xfs_dq_logformat_t); - logvec->i_type = XLOG_REG_TYPE_QFORMAT; - logvec++; - logvec->i_addr = &qlip->qli_dquot->q_core; - logvec->i_len = sizeof(xfs_disk_dquot_t); - logvec->i_type = XLOG_REG_TYPE_DQUOT; + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_QFORMAT, + &qlip->qli_format, + sizeof(struct xfs_dq_logformat)); + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_DQUOT, + &qlip->qli_dquot->q_core, + sizeof(struct xfs_disk_dquot)); qlip->qli_format.qlf_size = 2; - } /* @@ -304,15 +302,16 @@ xfs_qm_qoff_logitem_size( STATIC void xfs_qm_qoff_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_iovec *vecp) { struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - log_vector->i_addr = &qflip->qql_format; - log_vector->i_len = sizeof(xfs_qoff_logitem_t); - log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_QUOTAOFF, + &qflip->qql_format, + sizeof(struct xfs_qoff_logitem)); + qflip->qql_format.qf_size = 1; } diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 3680d04f973f..08823ecbcd82 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -26,6 +26,7 @@ #include "xfs_trans_priv.h" #include "xfs_buf_item.h" #include "xfs_extfree_item.h" +#include "xfs_log.h" kmem_zone_t *xfs_efi_zone; @@ -101,7 +102,7 @@ xfs_efi_item_size( STATIC void xfs_efi_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_iovec *vecp) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); @@ -111,10 +112,9 @@ xfs_efi_item_format( efip->efi_format.efi_type = XFS_LI_EFI; efip->efi_format.efi_size = 1; - log_vector->i_addr = &efip->efi_format; - log_vector->i_len = xfs_efi_item_sizeof(efip); - log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; - ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t)); + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_EFI_FORMAT, + &efip->efi_format, + xfs_efi_item_sizeof(efip)); } @@ -368,7 +368,7 @@ xfs_efd_item_size( STATIC void xfs_efd_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_iovec *vecp) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); @@ -377,10 +377,9 @@ xfs_efd_item_format( efdp->efd_format.efd_type = XFS_LI_EFD; efdp->efd_format.efd_size = 1; - log_vector->i_addr = &efdp->efd_format; - log_vector->i_len = xfs_efd_item_sizeof(efdp); - log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; - ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t)); + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_EFD_FORMAT, + &efdp->efd_format, + xfs_efd_item_sizeof(efdp)); } /* diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index d2eaccfa73f4..5751fa8580ee 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -28,6 +28,7 @@ #include "xfs_trans_priv.h" #include "xfs_error.h" #include "xfs_icreate_item.h" +#include "xfs_log.h" kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ @@ -58,13 +59,13 @@ xfs_icreate_item_size( STATIC void xfs_icreate_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) + struct xfs_log_iovec *vecp) { struct xfs_icreate_item *icp = ICR_ITEM(lip); - log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; - log_vector->i_len = sizeof(struct xfs_icreate_log); - log_vector->i_type = XLOG_REG_TYPE_ICREATE; + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_ICREATE, + &icp->ic_format, + sizeof(struct xfs_icreate_log)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 2ad12dcf8311..c75e14beff06 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -30,6 +30,7 @@ #include "xfs_trace.h" #include "xfs_trans_priv.h" #include "xfs_dinode.h" +#include "xfs_log.h" kmem_zone_t *xfs_ili_zone; /* inode log item zone */ @@ -159,14 +160,15 @@ xfs_inode_item_size( * here, so always use the physical fork size to determine the size of the * buffer we need to allocate. */ -STATIC void +STATIC int xfs_inode_item_format_extents( struct xfs_inode *ip, - struct xfs_log_iovec *vecp, + struct xfs_log_iovec **vecp, int whichfork, int type) { xfs_bmbt_rec_t *ext_buffer; + int len; ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); if (whichfork == XFS_DATA_FORK) @@ -174,9 +176,9 @@ xfs_inode_item_format_extents( else ip->i_itemp->ili_aextents_buf = ext_buffer; - vecp->i_addr = ext_buffer; - vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); - vecp->i_type = type; + len = xfs_iextents_copy(ip, ext_buffer, whichfork); + xlog_copy_iovec(vecp, type, ext_buffer, len); + return len; } /* @@ -207,10 +209,10 @@ xfs_inode_item_format_v1_inode( } } -STATIC struct xfs_log_iovec * +STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, - struct xfs_log_iovec *vecp, + struct xfs_log_iovec **vecp, int *nvecs) { struct xfs_inode *ip = iip->ili_inode; @@ -237,18 +239,18 @@ xfs_inode_item_format_data_fork( * extents, so just point to the * real extents array. */ - vecp->i_addr = ip->i_df.if_u1.if_extents; - vecp->i_len = ip->i_df.if_bytes; - vecp->i_type = XLOG_REG_TYPE_IEXT; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_IEXT, + ip->i_df.if_u1.if_extents, + ip->i_df.if_bytes); + iip->ili_format.ilf_dsize = ip->i_df.if_bytes; } else #endif { - xfs_inode_item_format_extents(ip, vecp, - XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); + iip->ili_format.ilf_dsize = + xfs_inode_item_format_extents(ip, vecp, + XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); + ASSERT(iip->ili_format.ilf_dsize <= ip->i_df.if_bytes); } - ASSERT(vecp->i_len <= ip->i_df.if_bytes); - iip->ili_format.ilf_dsize = vecp->i_len; - vecp++; (*nvecs)++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; @@ -262,10 +264,9 @@ xfs_inode_item_format_data_fork( if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); - vecp->i_addr = ip->i_df.if_broot; - vecp->i_len = ip->i_df.if_broot_bytes; - vecp->i_type = XLOG_REG_TYPE_IBROOT; - vecp++; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_IBROOT, + ip->i_df.if_broot, + ip->i_df.if_broot_bytes); (*nvecs)++; iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; } else { @@ -280,21 +281,18 @@ xfs_inode_item_format_data_fork( XFS_ILOG_DEV | XFS_ILOG_UUID); if ((iip->ili_fields & XFS_ILOG_DDATA) && ip->i_df.if_bytes > 0) { - ASSERT(ip->i_df.if_u1.if_data != NULL); - ASSERT(ip->i_d.di_size > 0); - - vecp->i_addr = ip->i_df.if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_df.if_bytes, 4); - ASSERT((ip->i_df.if_real_bytes == 0) || - (ip->i_df.if_real_bytes == data_bytes)); - vecp->i_len = (int)data_bytes; - vecp->i_type = XLOG_REG_TYPE_ILOCAL; - vecp++; + ASSERT(ip->i_df.if_real_bytes == 0 || + ip->i_df.if_real_bytes == data_bytes); + ASSERT(ip->i_df.if_u1.if_data != NULL); + ASSERT(ip->i_d.di_size > 0); + xlog_copy_iovec(vecp, XLOG_REG_TYPE_ILOCAL, + ip->i_df.if_u1.if_data, data_bytes); (*nvecs)++; iip->ili_format.ilf_dsize = (unsigned)data_bytes; } else { @@ -323,14 +321,12 @@ xfs_inode_item_format_data_fork( ASSERT(0); break; } - - return vecp; } -STATIC struct xfs_log_iovec * +STATIC void xfs_inode_item_format_attr_fork( struct xfs_inode_log_item *iip, - struct xfs_log_iovec *vecp, + struct xfs_log_iovec **vecp, int *nvecs) { struct xfs_inode *ip = iip->ili_inode; @@ -352,16 +348,16 @@ xfs_inode_item_format_attr_fork( * There are not delayed allocation extents * for attributes, so just point at the array. */ - vecp->i_addr = ip->i_afp->if_u1.if_extents; - vecp->i_len = ip->i_afp->if_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_EXT, + ip->i_afp->if_u1.if_extents, + ip->i_afp->if_bytes); + iip->ili_format.ilf_asize = ip->i_afp->if_bytes; #else ASSERT(iip->ili_aextents_buf == NULL); - xfs_inode_item_format_extents(ip, vecp, + iip->ili_format.ilf_asize = + xfs_inode_item_format_extents(ip, vecp, XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif - iip->ili_format.ilf_asize = vecp->i_len; - vecp++; (*nvecs)++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; @@ -375,10 +371,9 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_broot_bytes > 0) { ASSERT(ip->i_afp->if_broot != NULL); - vecp->i_addr = ip->i_afp->if_broot; - vecp->i_len = ip->i_afp->if_broot_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; - vecp++; + xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_BROOT, + ip->i_afp->if_broot, + ip->i_afp->if_broot_bytes); (*nvecs)++; iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; } else { @@ -391,20 +386,18 @@ xfs_inode_item_format_attr_fork( if ((iip->ili_fields & XFS_ILOG_ADATA) && ip->i_afp->if_bytes > 0) { - ASSERT(ip->i_afp->if_u1.if_data != NULL); - - vecp->i_addr = ip->i_afp->if_u1.if_data; /* * Round i_bytes up to a word boundary. * The underlying memory is guaranteed to * to be there by xfs_idata_realloc(). */ data_bytes = roundup(ip->i_afp->if_bytes, 4); - ASSERT((ip->i_afp->if_real_bytes == 0) || - (ip->i_afp->if_real_bytes == data_bytes)); - vecp->i_len = (int)data_bytes; - vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; - vecp++; + ASSERT(ip->i_afp->if_real_bytes == 0 || + ip->i_afp->if_real_bytes == data_bytes); + ASSERT(ip->i_afp->if_u1.if_data != NULL); + xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_LOCAL, + ip->i_afp->if_u1.if_data, + data_bytes); (*nvecs)++; iip->ili_format.ilf_asize = (unsigned)data_bytes; } else { @@ -415,8 +408,6 @@ xfs_inode_item_format_attr_fork( ASSERT(0); break; } - - return vecp; } /* @@ -435,24 +426,22 @@ xfs_inode_item_format( struct xfs_inode *ip = iip->ili_inode; uint nvecs; - vecp->i_addr = &iip->ili_format; - vecp->i_len = sizeof(xfs_inode_log_format_t); - vecp->i_type = XLOG_REG_TYPE_IFORMAT; - vecp++; - nvecs = 1; + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_IFORMAT, + &iip->ili_format, + sizeof(struct xfs_inode_log_format)); + nvecs = 1; - vecp->i_addr = &ip->i_d; - vecp->i_len = xfs_icdinode_size(ip->i_d.di_version); - vecp->i_type = XLOG_REG_TYPE_ICORE; - vecp++; + xlog_copy_iovec(&vecp, XLOG_REG_TYPE_ICORE, + &ip->i_d, + xfs_icdinode_size(ip->i_d.di_version)); nvecs++; if (ip->i_d.di_version == 1) xfs_inode_item_format_v1_inode(ip); - vecp = xfs_inode_item_format_data_fork(iip, vecp, &nvecs); + xfs_inode_item_format_data_fork(iip, &vecp, &nvecs); if (XFS_IFORK_Q(ip)) { - vecp = xfs_inode_item_format_attr_fork(iip, vecp, &nvecs); + xfs_inode_item_format_attr_fork(iip, &vecp, &nvecs); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index e148719e0a5d..384c6c469661 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -30,6 +30,19 @@ struct xfs_log_vec { #define XFS_LOG_VEC_ORDERED (-1) +static inline void * +xlog_copy_iovec(struct xfs_log_iovec **vecp, uint type, void *data, int len) +{ + struct xfs_log_iovec *vec = *vecp; + + vec->i_type = type; + vec->i_addr = data; + vec->i_len = len; + + *vecp = vec + 1; + return vec->i_addr; +} + /* * Structure used to pass callback function and the function's argument * to the log manager. From bde7cff67c39227c6ad503394e19e58debdbc5e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:34:02 +1100 Subject: [PATCH 26/51] xfs: format log items write directly into the linear CIL buffer Instead of setting up pointers to memory locations in iop_format which then get copied into the CIL linear buffer after return move the copy into the individual inode items. This avoids the need to always have a memory block in the exact same layout that gets written into the log around, and allow the log items to be much more flexible in their in-memory layouts. The only caveat is that we need to properly align the data for each iovec so that don't have structures misaligned in subsequent iovecs. Note that all log item format routines now need to be careful to modify the copy of the item that was placed into the CIL after calls to xlog_copy_iovec instead of the in-memory copy. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_buf_item.c | 29 ++++++------ fs/xfs/xfs_dquot_item.c | 25 ++++++----- fs/xfs/xfs_extfree_item.c | 10 +++-- fs/xfs/xfs_icreate_item.c | 5 ++- fs/xfs/xfs_inode_item.c | 92 +++++++++++++++++++-------------------- fs/xfs/xfs_log.h | 43 +++++++++++++++--- fs/xfs/xfs_log_cil.c | 41 ++++------------- fs/xfs/xfs_trans.h | 2 +- 8 files changed, 129 insertions(+), 118 deletions(-) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d49419d4bb46..764117305438 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -184,6 +184,7 @@ xfs_buf_item_size( static inline void xfs_buf_item_copy_iovec( + struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, struct xfs_buf *bp, uint offset, @@ -191,7 +192,7 @@ xfs_buf_item_copy_iovec( uint nbits) { offset += first_bit * XFS_BLF_CHUNK; - xlog_copy_iovec(vecp, XLOG_REG_TYPE_BCHUNK, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, xfs_buf_offset(bp, offset), nbits * XFS_BLF_CHUNK); } @@ -211,13 +212,13 @@ xfs_buf_item_straddle( static void xfs_buf_item_format_segment( struct xfs_buf_log_item *bip, + struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, uint offset, struct xfs_buf_log_format *blfp) { struct xfs_buf *bp = bip->bli_buf; uint base_size; - uint nvecs; int first_bit; int last_bit; int next_bit; @@ -233,18 +234,17 @@ xfs_buf_item_format_segment( */ base_size = xfs_buf_log_format_size(blfp); - nvecs = 0; first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { /* * If the map is not be dirty in the transaction, mark * the size as zero and do not advance the vector pointer. */ - goto out; + return; } - xlog_copy_iovec(vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); - nvecs = 1; + blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); + blfp->blf_size = 1; if (bip->bli_flags & XFS_BLI_STALE) { /* @@ -254,7 +254,7 @@ xfs_buf_item_format_segment( */ trace_xfs_buf_item_format_stale(bip); ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); - goto out; + return; } @@ -280,15 +280,15 @@ xfs_buf_item_format_segment( * same set of bits so just keep counting and scanning. */ if (next_bit == -1) { - xfs_buf_item_copy_iovec(vecp, bp, offset, + xfs_buf_item_copy_iovec(lv, vecp, bp, offset, first_bit, nbits); - nvecs++; + blfp->blf_size++; break; } else if (next_bit != last_bit + 1 || xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { - xfs_buf_item_copy_iovec(vecp, bp, offset, + xfs_buf_item_copy_iovec(lv, vecp, bp, offset, first_bit, nbits); - nvecs++; + blfp->blf_size++; first_bit = next_bit; last_bit = next_bit; nbits = 1; @@ -297,8 +297,6 @@ xfs_buf_item_format_segment( nbits++; } } -out: - blfp->blf_size = nvecs; } /* @@ -310,10 +308,11 @@ out: STATIC void xfs_buf_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_iovec *vecp = NULL; uint offset = 0; int i; @@ -354,7 +353,7 @@ xfs_buf_item_format( } for (i = 0; i < bip->bli_format_count; i++) { - xfs_buf_item_format_segment(bip, &vecp, offset, + xfs_buf_item_format_segment(bip, lv, &vecp, offset, &bip->bli_formats[i]); offset += bp->b_maps[i].bm_len; } diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index ca354a821838..946d588070b0 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -57,18 +57,19 @@ xfs_qm_dquot_logitem_size( STATIC void xfs_qm_dquot_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_QFORMAT, - &qlip->qli_format, - sizeof(struct xfs_dq_logformat)); - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_DQUOT, - &qlip->qli_dquot->q_core, - sizeof(struct xfs_disk_dquot)); + struct xfs_log_iovec *vecp = NULL; qlip->qli_format.qlf_size = 2; + + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT, + &qlip->qli_format, + sizeof(struct xfs_dq_logformat)); + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, + &qlip->qli_dquot->q_core, + sizeof(struct xfs_disk_dquot)); } /* @@ -302,17 +303,17 @@ xfs_qm_qoff_logitem_size( STATIC void xfs_qm_qoff_logitem_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); + qflip->qql_format.qf_size = 1; - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_QUOTAOFF, + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF, &qflip->qql_format, sizeof(struct xfs_qoff_logitem)); - - qflip->qql_format.qf_size = 1; } /* diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 08823ecbcd82..fb7a4c1ce1c5 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -102,9 +102,10 @@ xfs_efi_item_size( STATIC void xfs_efi_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; ASSERT(atomic_read(&efip->efi_next_extent) == efip->efi_format.efi_nextents); @@ -112,7 +113,7 @@ xfs_efi_item_format( efip->efi_format.efi_type = XFS_LI_EFI; efip->efi_format.efi_size = 1; - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_EFI_FORMAT, + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format, xfs_efi_item_sizeof(efip)); } @@ -368,16 +369,17 @@ xfs_efd_item_size( STATIC void xfs_efd_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents); efdp->efd_format.efd_type = XFS_LI_EFD; efdp->efd_format.efd_size = 1; - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_EFD_FORMAT, + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format, xfs_efd_item_sizeof(efdp)); } diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 5751fa8580ee..7e4549233251 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -59,11 +59,12 @@ xfs_icreate_item_size( STATIC void xfs_icreate_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_icreate_item *icp = ICR_ITEM(lip); + struct xfs_log_iovec *vecp = NULL; - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_ICREATE, + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, &icp->ic_format, sizeof(struct xfs_icreate_log)); } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c75e14beff06..6ab318f80c96 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -163,6 +163,7 @@ xfs_inode_item_size( STATIC int xfs_inode_item_format_extents( struct xfs_inode *ip, + struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, int whichfork, int type) @@ -177,7 +178,7 @@ xfs_inode_item_format_extents( ip->i_itemp->ili_aextents_buf = ext_buffer; len = xfs_iextents_copy(ip, ext_buffer, whichfork); - xlog_copy_iovec(vecp, type, ext_buffer, len); + xlog_copy_iovec(lv, vecp, type, ext_buffer, len); return len; } @@ -212,8 +213,9 @@ xfs_inode_item_format_v1_inode( STATIC void xfs_inode_item_format_data_fork( struct xfs_inode_log_item *iip, - struct xfs_log_iovec **vecp, - int *nvecs) + struct xfs_inode_log_format *ilf, + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -239,19 +241,19 @@ xfs_inode_item_format_data_fork( * extents, so just point to the * real extents array. */ - xlog_copy_iovec(vecp, XLOG_REG_TYPE_IEXT, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IEXT, ip->i_df.if_u1.if_extents, ip->i_df.if_bytes); - iip->ili_format.ilf_dsize = ip->i_df.if_bytes; + ilf->ilf_dsize = ip->i_df.if_bytes; } else #endif { - iip->ili_format.ilf_dsize = - xfs_inode_item_format_extents(ip, vecp, + ilf->ilf_dsize = + xfs_inode_item_format_extents(ip, lv, vecp, XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); ASSERT(iip->ili_format.ilf_dsize <= ip->i_df.if_bytes); } - (*nvecs)++; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; } @@ -264,11 +266,11 @@ xfs_inode_item_format_data_fork( if ((iip->ili_fields & XFS_ILOG_DBROOT) && ip->i_df.if_broot_bytes > 0) { ASSERT(ip->i_df.if_broot != NULL); - xlog_copy_iovec(vecp, XLOG_REG_TYPE_IBROOT, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, ip->i_df.if_broot, ip->i_df.if_broot_bytes); - (*nvecs)++; - iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; + ilf->ilf_dsize = ip->i_df.if_broot_bytes; + ilf->ilf_size++; } else { ASSERT(!(iip->ili_fields & XFS_ILOG_DBROOT)); @@ -291,10 +293,10 @@ xfs_inode_item_format_data_fork( ip->i_df.if_real_bytes == data_bytes); ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_d.di_size > 0); - xlog_copy_iovec(vecp, XLOG_REG_TYPE_ILOCAL, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, ip->i_df.if_u1.if_data, data_bytes); - (*nvecs)++; - iip->ili_format.ilf_dsize = (unsigned)data_bytes; + ilf->ilf_dsize = (unsigned)data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DDATA; } @@ -303,19 +305,15 @@ xfs_inode_item_format_data_fork( iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_UUID); - if (iip->ili_fields & XFS_ILOG_DEV) { - iip->ili_format.ilf_u.ilfu_rdev = - ip->i_df.if_u2.if_rdev; - } + if (iip->ili_fields & XFS_ILOG_DEV) + ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev; break; case XFS_DINODE_FMT_UUID: iip->ili_fields &= ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT | XFS_ILOG_DEV); - if (iip->ili_fields & XFS_ILOG_UUID) { - iip->ili_format.ilf_u.ilfu_uuid = - ip->i_df.if_u2.if_uuid; - } + if (iip->ili_fields & XFS_ILOG_UUID) + ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid; break; default: ASSERT(0); @@ -326,8 +324,9 @@ xfs_inode_item_format_data_fork( STATIC void xfs_inode_item_format_attr_fork( struct xfs_inode_log_item *iip, - struct xfs_log_iovec **vecp, - int *nvecs) + struct xfs_inode_log_format *ilf, + struct xfs_log_vec *lv, + struct xfs_log_iovec **vecp) { struct xfs_inode *ip = iip->ili_inode; size_t data_bytes; @@ -348,17 +347,17 @@ xfs_inode_item_format_attr_fork( * There are not delayed allocation extents * for attributes, so just point at the array. */ - xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_EXT, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT, ip->i_afp->if_u1.if_extents, ip->i_afp->if_bytes); - iip->ili_format.ilf_asize = ip->i_afp->if_bytes; + ilf->ilf_asize = ip->i_afp->if_bytes; #else ASSERT(iip->ili_aextents_buf == NULL); - iip->ili_format.ilf_asize = - xfs_inode_item_format_extents(ip, vecp, + ilf->ilf_asize = + xfs_inode_item_format_extents(ip, lv, vecp, XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); #endif - (*nvecs)++; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; } @@ -371,11 +370,11 @@ xfs_inode_item_format_attr_fork( ip->i_afp->if_broot_bytes > 0) { ASSERT(ip->i_afp->if_broot != NULL); - xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_BROOT, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, ip->i_afp->if_broot, ip->i_afp->if_broot_bytes); - (*nvecs)++; - iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; + ilf->ilf_asize = ip->i_afp->if_broot_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ABROOT; } @@ -395,11 +394,11 @@ xfs_inode_item_format_attr_fork( ASSERT(ip->i_afp->if_real_bytes == 0 || ip->i_afp->if_real_bytes == data_bytes); ASSERT(ip->i_afp->if_u1.if_data != NULL); - xlog_copy_iovec(vecp, XLOG_REG_TYPE_IATTR_LOCAL, + xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, ip->i_afp->if_u1.if_data, data_bytes); - (*nvecs)++; - iip->ili_format.ilf_asize = (unsigned)data_bytes; + ilf->ilf_asize = (unsigned)data_bytes; + ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_ADATA; } @@ -420,28 +419,28 @@ xfs_inode_item_format_attr_fork( STATIC void xfs_inode_item_format( struct xfs_log_item *lip, - struct xfs_log_iovec *vecp) + struct xfs_log_vec *lv) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - uint nvecs; + struct xfs_inode_log_format *ilf; + struct xfs_log_iovec *vecp = NULL; - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_IFORMAT, + ilf = xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT, &iip->ili_format, sizeof(struct xfs_inode_log_format)); - nvecs = 1; - - xlog_copy_iovec(&vecp, XLOG_REG_TYPE_ICORE, - &ip->i_d, - xfs_icdinode_size(ip->i_d.di_version)); - nvecs++; + ilf->ilf_size = 1; if (ip->i_d.di_version == 1) xfs_inode_item_format_v1_inode(ip); + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, + &ip->i_d, + xfs_icdinode_size(ip->i_d.di_version)); + ilf->ilf_size++; - xfs_inode_item_format_data_fork(iip, &vecp, &nvecs); + xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); if (XFS_IFORK_Q(ip)) { - xfs_inode_item_format_attr_fork(iip, &vecp, &nvecs); + xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); } else { iip->ili_fields &= ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); @@ -455,7 +454,6 @@ xfs_inode_item_format( */ iip->ili_format.ilf_fields = XFS_ILOG_CORE | (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); - iip->ili_format.ilf_size = nvecs; } /* diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 384c6c469661..b0f4ef77fa70 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -31,18 +31,51 @@ struct xfs_log_vec { #define XFS_LOG_VEC_ORDERED (-1) static inline void * -xlog_copy_iovec(struct xfs_log_iovec **vecp, uint type, void *data, int len) +xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + uint type) { struct xfs_log_iovec *vec = *vecp; - vec->i_type = type; - vec->i_addr = data; - vec->i_len = len; + if (vec) { + ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); + vec++; + } else { + vec = &lv->lv_iovecp[0]; + } - *vecp = vec + 1; + vec->i_type = type; + vec->i_addr = lv->lv_buf + lv->lv_buf_len; + + ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); + + *vecp = vec; return vec->i_addr; } +static inline void +xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) +{ + /* + * We need to make sure the next buffer is naturally aligned for the + * biggest basic data type we put into it. We already accounted for + * this when sizing the buffer. + */ + lv->lv_buf_len += round_up(len, sizeof(uint64_t)); + vec->i_len = len; +} + +static inline void * +xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, + uint type, void *data, int len) +{ + void *buf; + + buf = xlog_prepare_iovec(lv, vecp, type); + memcpy(buf, data, len); + xlog_finish_iovec(lv, *vecp, len); + return buf; +} + /* * Structure used to pass callback function and the function's argument * to the log manager. diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 0a7a8cef6019..cdebd832c3db 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -82,36 +82,6 @@ xlog_cil_init_post_recovery( log->l_curr_block); } -STATIC int -xlog_cil_lv_item_format( - struct xfs_log_item *lip, - struct xfs_log_vec *lv) -{ - int index; - char *ptr; - - /* format new vectors into array */ - lip->li_ops->iop_format(lip, lv->lv_iovecp); - - /* copy data into existing array */ - ptr = lv->lv_buf; - for (index = 0; index < lv->lv_niovecs; index++) { - struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; - - memcpy(ptr, vec->i_addr, vec->i_len); - vec->i_addr = ptr; - ptr += vec->i_len; - } - - /* - * some size calculations for log vectors over-estimate, so the caller - * doesn't know the amount of space actually used by the item. Return - * the byte count to the caller so they can check and store it - * appropriately. - */ - return ptr - lv->lv_buf; -} - /* * Prepare the log item for insertion into the CIL. Calculate the difference in * log space and vectors it will consume, and if it is a new item pin it as @@ -232,6 +202,13 @@ xlog_cil_insert_format_items( nbytes = 0; } + /* + * We 64-bit align the length of each iovec so that the start + * of the next one is naturally aligned. We'll need to + * account for that slack space here. + */ + nbytes += niovecs * sizeof(uint64_t); + /* grab the old item if it exists for reservation accounting */ old_lv = lip->li_lv; @@ -272,9 +249,9 @@ xlog_cil_insert_format_items( lv->lv_niovecs = niovecs; /* The allocated data region lies beyond the iovec region */ + lv->lv_buf_len = 0; lv->lv_buf = (char *)lv + buf_size - nbytes; - - lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); + lip->li_ops->iop_format(lip, lv); insert: ASSERT(lv->lv_buf_len <= nbytes); xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9b96d35e483d..b5bc1ab3c4da 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -64,7 +64,7 @@ typedef struct xfs_log_item { struct xfs_item_ops { void (*iop_size)(xfs_log_item_t *, int *, int *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); + void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); void (*iop_pin)(xfs_log_item_t *); void (*iop_unpin)(xfs_log_item_t *, int remove); uint (*iop_push)(struct xfs_log_item *, struct list_head *); From da7765031de15273d370d18a5354e1d8001ce2a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:34:04 +1100 Subject: [PATCH 27/51] xfs: format logged extents directly into the CIL With the new iop_format scheme there is no need to have a temporary buffer to format logged extents into, we can do so directly into the CIL. This also allows to remove the shortcut for big endian systems that probably hasn't gotten a lot of test coverage for a long time. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode_fork.c | 15 +++--- fs/xfs/xfs_inode_item.c | 111 ++++++---------------------------------- fs/xfs/xfs_inode_item.h | 4 -- 3 files changed, 25 insertions(+), 105 deletions(-) diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index cfee14a83cfe..06abaeef1715 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -721,15 +721,16 @@ xfs_idestroy_fork( } /* - * xfs_iextents_copy() + * Convert in-core extents to on-disk form * - * This is called to copy the REAL extents (as opposed to the delayed - * allocation extents) from the inode into the given buffer. It - * returns the number of bytes copied into the buffer. + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode. * - * If there are no delayed allocation extents, then we can just - * memcpy() the extents into the buffer. Otherwise, we need to - * examine each extent in turn and skip those which are delayed. + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only copy on-disk extents + * here, so callers must always use the physical fork size to determine the + * size of the buffer passed to this routine. We will return the size actually + * used. */ int xfs_iextents_copy( diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6ab318f80c96..45224d28049e 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -145,43 +145,6 @@ xfs_inode_item_size( xfs_inode_item_attr_fork_size(iip, nvecs, nbytes); } -/* - * xfs_inode_item_format_extents - convert in-core extents to on-disk form - * - * For either the data or attr fork in extent format, we need to endian convert - * the in-core extent as we place them into the on-disk inode. In this case, we - * need to do this conversion before we write the extents into the log. Because - * we don't have the disk inode to write into here, we allocate a buffer and - * format the extents into it via xfs_iextents_copy(). We free the buffer in - * the unlock routine after the copy for the log has been made. - * - * In the case of the data fork, the in-core and on-disk fork sizes can be - * different due to delayed allocation extents. We only log on-disk extents - * here, so always use the physical fork size to determine the size of the - * buffer we need to allocate. - */ -STATIC int -xfs_inode_item_format_extents( - struct xfs_inode *ip, - struct xfs_log_vec *lv, - struct xfs_log_iovec **vecp, - int whichfork, - int type) -{ - xfs_bmbt_rec_t *ext_buffer; - int len; - - ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); - if (whichfork == XFS_DATA_FORK) - ip->i_itemp->ili_extents_buf = ext_buffer; - else - ip->i_itemp->ili_aextents_buf = ext_buffer; - - len = xfs_iextents_copy(ip, ext_buffer, whichfork); - xlog_copy_iovec(lv, vecp, type, ext_buffer, len); - return len; -} - /* * If this is a v1 format inode, then we need to log it as such. This means * that we have to copy the link count from the new field to the old. We @@ -229,30 +192,18 @@ xfs_inode_item_format_data_fork( if ((iip->ili_fields & XFS_ILOG_DEXT) && ip->i_d.di_nextents > 0 && ip->i_df.if_bytes > 0) { + struct xfs_bmbt_rec *p; + ASSERT(ip->i_df.if_u1.if_extents != NULL); ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); - ASSERT(iip->ili_extents_buf == NULL); -#ifdef XFS_NATIVE_HOST - if (ip->i_d.di_nextents == ip->i_df.if_bytes / - (uint)sizeof(xfs_bmbt_rec_t)) { - /* - * There are no delayed allocation - * extents, so just point to the - * real extents array. - */ - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IEXT, - ip->i_df.if_u1.if_extents, - ip->i_df.if_bytes); - ilf->ilf_dsize = ip->i_df.if_bytes; - } else -#endif - { - ilf->ilf_dsize = - xfs_inode_item_format_extents(ip, lv, vecp, - XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); - ASSERT(iip->ili_format.ilf_dsize <= ip->i_df.if_bytes); - } + p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); + data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); + xlog_finish_iovec(lv, *vecp, data_bytes); + + ASSERT(data_bytes <= ip->i_df.if_bytes); + + ilf->ilf_dsize = data_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_DEXT; @@ -339,24 +290,17 @@ xfs_inode_item_format_attr_fork( if ((iip->ili_fields & XFS_ILOG_AEXT) && ip->i_d.di_anextents > 0 && ip->i_afp->if_bytes > 0) { + struct xfs_bmbt_rec *p; + ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) == ip->i_d.di_anextents); ASSERT(ip->i_afp->if_u1.if_extents != NULL); -#ifdef XFS_NATIVE_HOST - /* - * There are not delayed allocation extents - * for attributes, so just point at the array. - */ - xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT, - ip->i_afp->if_u1.if_extents, - ip->i_afp->if_bytes); - ilf->ilf_asize = ip->i_afp->if_bytes; -#else - ASSERT(iip->ili_aextents_buf == NULL); - ilf->ilf_asize = - xfs_inode_item_format_extents(ip, lv, vecp, - XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); -#endif + + p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); + data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); + xlog_finish_iovec(lv, *vecp, data_bytes); + + ilf->ilf_asize = data_bytes; ilf->ilf_size++; } else { iip->ili_fields &= ~XFS_ILOG_AEXT; @@ -571,27 +515,6 @@ xfs_inode_item_unlock( ASSERT(ip->i_itemp != NULL); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - /* - * If the inode needed a separate buffer with which to log - * its extents, then free it now. - */ - if (iip->ili_extents_buf != NULL) { - ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); - ASSERT(ip->i_d.di_nextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_DEXT); - ASSERT(ip->i_df.if_bytes > 0); - kmem_free(iip->ili_extents_buf); - iip->ili_extents_buf = NULL; - } - if (iip->ili_aextents_buf != NULL) { - ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); - ASSERT(ip->i_d.di_anextents > 0); - ASSERT(iip->ili_fields & XFS_ILOG_AEXT); - ASSERT(ip->i_afp->if_bytes > 0); - kmem_free(iip->ili_aextents_buf); - iip->ili_aextents_buf = NULL; - } - lock_flags = iip->ili_lock_flags; iip->ili_lock_flags = 0; if (lock_flags) diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index dce4d656768c..29b5f2b6533a 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,10 +34,6 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ - struct xfs_bmbt_rec *ili_extents_buf; /* array of logged - data exts */ - struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged - attr exts */ xfs_inode_log_format_t ili_format; /* logged structure */ } xfs_inode_log_item_t; From 2f251293b09065118d78ae4e883e5639cc22f94e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:34:05 +1100 Subject: [PATCH 28/51] xfs: remove the inode log format from the inode log item No need to keep the inode log format around all the time, we can easily generate it at iop_format time. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode_item.c | 29 +++++++++++------------------ fs/xfs/xfs_inode_item.h | 1 - 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 45224d28049e..686889b4a1e5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -370,17 +370,21 @@ xfs_inode_item_format( struct xfs_inode_log_format *ilf; struct xfs_log_iovec *vecp = NULL; - ilf = xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT, - &iip->ili_format, - sizeof(struct xfs_inode_log_format)); - ilf->ilf_size = 1; + ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); + ilf->ilf_type = XFS_LI_INODE; + ilf->ilf_ino = ip->i_ino; + ilf->ilf_blkno = ip->i_imap.im_blkno; + ilf->ilf_len = ip->i_imap.im_len; + ilf->ilf_boffset = ip->i_imap.im_boffset; + ilf->ilf_fields = XFS_ILOG_CORE; + ilf->ilf_size = 2; /* format + core */ + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); if (ip->i_d.di_version == 1) xfs_inode_item_format_v1_inode(ip); xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, &ip->i_d, xfs_icdinode_size(ip->i_d.di_version)); - ilf->ilf_size++; xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); if (XFS_IFORK_Q(ip)) { @@ -390,14 +394,8 @@ xfs_inode_item_format( ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); } - /* - * Now update the log format that goes out to disk from the in-core - * values. We always write the inode core to make the arithmetic - * games in recovery easier, which isn't a big deal as just about any - * transaction would dirty it anyway. - */ - iip->ili_format.ilf_fields = XFS_ILOG_CORE | - (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); + /* update the format with the exact fields we actually logged */ + ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); } /* @@ -601,11 +599,6 @@ xfs_inode_item_init( iip->ili_inode = ip; xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE, &xfs_inode_item_ops); - iip->ili_format.ilf_type = XFS_LI_INODE; - iip->ili_format.ilf_ino = ip->i_ino; - iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; - iip->ili_format.ilf_len = ip->i_imap.im_len; - iip->ili_format.ilf_boffset = ip->i_imap.im_boffset; } /* diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 29b5f2b6533a..488d81254e28 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,7 +34,6 @@ typedef struct xfs_inode_log_item { unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ unsigned int ili_fields; /* fields to be logged */ - xfs_inode_log_format_t ili_format; /* logged structure */ } xfs_inode_log_item_t; static inline int xfs_inode_clean(xfs_inode_t *ip) From ce8e962939ca12218092f8eb3c8cfb196cd8cc51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:34:07 +1100 Subject: [PATCH 29/51] xfs: remove the dquot log format from the dquot log item No need to keep the dquot log format around all the time, we can easily generate it at iop_format time. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_dquot_item.c | 25 +++++++++---------------- fs/xfs/xfs_dquot_item.h | 1 - 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 946d588070b0..d4fffa90036a 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -61,12 +61,17 @@ xfs_qm_dquot_logitem_format( { struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); struct xfs_log_iovec *vecp = NULL; + struct xfs_dq_logformat *qlf; - qlip->qli_format.qlf_size = 2; + qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); + qlf->qlf_type = XFS_LI_DQUOT; + qlf->qlf_size = 2; + qlf->qlf_id = be32_to_cpu(qlip->qli_dquot->q_core.d_id); + qlf->qlf_blkno = qlip->qli_dquot->q_blkno; + qlf->qlf_len = 1; + qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT, - &qlip->qli_format, - sizeof(struct xfs_dq_logformat)); xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &qlip->qli_dquot->q_core, sizeof(struct xfs_disk_dquot)); @@ -256,18 +261,6 @@ xfs_qm_dquot_logitem_init( xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, &xfs_dquot_item_ops); lp->qli_dquot = dqp; - lp->qli_format.qlf_type = XFS_LI_DQUOT; - lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); - lp->qli_format.qlf_blkno = dqp->q_blkno; - lp->qli_format.qlf_len = 1; - /* - * This is just the offset of this dquot within its buffer - * (which is currently 1 FSB and probably won't change). - * Hence 32 bits for this offset should be just fine. - * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) - * here, and recompute it at recovery time. - */ - lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; } /*------------------ QUOTAOFF LOG ITEMS -------------------*/ diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70b..925cbe948c67 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -27,7 +27,6 @@ typedef struct xfs_dq_logitem { xfs_log_item_t qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - xfs_dq_logformat_t qli_format; /* logged structure */ } xfs_dq_logitem_t; typedef struct xfs_qoff_logitem { From ffda4e83aa107ff55345dc583efdb24fca486fb5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 13 Dec 2013 11:34:08 +1100 Subject: [PATCH 30/51] xfs: remove the quotaoff log format from the quotaoff log item This one doesn't save a whole lot of memory, but still makes the code simpler. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_dquot_item.c | 22 +++++++--------------- fs/xfs/xfs_dquot_item.h | 2 +- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index d4fffa90036a..f33fbaaa4d8a 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -286,13 +286,6 @@ xfs_qm_qoff_logitem_size( *nbytes += sizeof(struct xfs_qoff_logitem); } -/* - * This is called to fill in the vector of log iovecs for the - * given quotaoff log item. We use only 1 iovec, and we point that - * at the quotaoff_log_format structure embedded in the quotaoff item. - * It is at this point that we assert that all of the extent - * slots in the quotaoff item have been filled. - */ STATIC void xfs_qm_qoff_logitem_format( struct xfs_log_item *lip, @@ -300,13 +293,13 @@ xfs_qm_qoff_logitem_format( { struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); struct xfs_log_iovec *vecp = NULL; + struct xfs_qoff_logformat *qlf; - ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - qflip->qql_format.qf_size = 1; - - xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF, - &qflip->qql_format, - sizeof(struct xfs_qoff_logitem)); + qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF); + qlf->qf_type = XFS_LI_QUOTAOFF; + qlf->qf_size = 1; + qlf->qf_flags = qflip->qql_flags; + xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem)); } /* @@ -446,8 +439,7 @@ xfs_qm_qoff_logitem_init( xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); qf->qql_item.li_mountp = mp; - qf->qql_format.qf_type = XFS_LI_QUOTAOFF; - qf->qql_format.qf_flags = flags; qf->qql_start_lip = start; + qf->qql_flags = flags; return qf; } diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 925cbe948c67..502e9464634a 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -32,7 +32,7 @@ typedef struct xfs_dq_logitem { typedef struct xfs_qoff_logitem { xfs_log_item_t qql_item; /* common portion */ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ - xfs_qoff_logformat_t qql_format; /* logged structure */ + unsigned int qql_flags; } xfs_qoff_logitem_t; From 717834383c6ad2173323b823b97c521c9fb8fbbb Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:46 +1100 Subject: [PATCH 31/51] xfs: get rid of XFS_IALLOC_INODES macros Get rid of XFS_IALLOC_INODES() marcos, use mp->m_ialloc_inos directly. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 12 ++++++------ fs/xfs/xfs_ialloc.h | 1 - fs/xfs/xfs_log_recover.c | 4 ++-- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 7a728f9fc0be..2f87a53f74fe 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -225,7 +225,7 @@ xfs_ialloc_inode_init( * they track in the AIL as if they were physically logged. */ if (tp) - xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), + xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos, mp->m_sb.sb_inodesize, length, gen); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; @@ -329,7 +329,7 @@ xfs_ialloc_ag_alloc( * Locking will ensure that we don't have two callers in here * at one time. */ - newlen = XFS_IALLOC_INODES(args.mp); + newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) return XFS_ERROR(ENOSPC); @@ -999,7 +999,7 @@ xfs_dialloc( * inode. */ if (mp->m_maxicount && - mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { + mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) { noroom = 1; okalloc = 0; } @@ -1202,7 +1202,7 @@ xfs_difree( * When an inode cluster is free, it becomes eligible for removal */ if (!(mp->m_flags & XFS_MOUNT_IKEEP) && - (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { + (rec.ir_freecount == mp->m_ialloc_inos)) { *delete = 1; *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino); @@ -1212,7 +1212,7 @@ xfs_difree( * AGI and Superblock inode counts, and mark the disk space * to be freed when the transaction is committed. */ - ilen = XFS_IALLOC_INODES(mp); + ilen = mp->m_ialloc_inos; be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); @@ -1311,7 +1311,7 @@ xfs_imap_lookup( /* check that the returned record contains the required inode */ if (rec.ir_startino > agino || - rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) + rec.ir_startino + mp->m_ialloc_inos <= agino) return EINVAL; /* for untrusted inodes check it is allocated first */ diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index a8f76a5ff418..4689b025f79f 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -28,7 +28,6 @@ struct xfs_btree_cur; /* * Allocation parameters for inode allocation. */ -#define XFS_IALLOC_INODES(mp) (mp)->m_ialloc_inos #define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 07ab52ca8aba..f76de2eadb6d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3208,9 +3208,9 @@ xlog_recover_do_icreate_pass2( } /* existing allocation is fixed value */ - ASSERT(count == XFS_IALLOC_INODES(mp)); + ASSERT(count == mp->m_ialloc_inos); ASSERT(length == XFS_IALLOC_BLOCKS(mp)); - if (count != XFS_IALLOC_INODES(mp) || + if (count != mp->m_ialloc_inos || length != XFS_IALLOC_BLOCKS(mp)) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); return EINVAL; From 0f49efd805229fc747761213ec820c1ba3ab64db Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:48 +1100 Subject: [PATCH 32/51] xfs: get rid of XFS_INODE_CLUSTER_SIZE macros Get rid of XFS_INODE_CLUSTER_SIZE() macros, use mp->m_inode_cluster_size directly. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 10 +++++----- fs/xfs/xfs_ialloc.h | 5 +---- fs/xfs/xfs_inode.c | 10 +++++----- fs/xfs/xfs_itable.c | 4 ++-- fs/xfs/xfs_log_recover.c | 8 ++++---- fs/xfs/xfs_trans_resv.c | 2 +- 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 2f87a53f74fe..3ac5eb6bf856 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -52,7 +52,7 @@ xfs_ialloc_cluster_alignment( { if (xfs_sb_version_hasalign(&args->mp->m_sb) && args->mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) + XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size)) return args->mp->m_sb.sb_inoalignmt; return 1; } @@ -181,12 +181,12 @@ xfs_ialloc_inode_init( * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ - if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { + if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) { blks_per_cluster = 1; nbufs = length; ninodes = mp->m_sb.sb_inopblock; } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / + blks_per_cluster = mp->m_inode_cluster_size / mp->m_sb.sb_blocksize; nbufs = length / blks_per_cluster; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; @@ -1384,7 +1384,7 @@ xfs_imap( return XFS_ERROR(EINVAL); } - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; + blks_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; /* * For bulkstat and handle lookups, we have an untrusted inode number @@ -1405,7 +1405,7 @@ xfs_imap( * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. */ - if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { + if (mp->m_inode_cluster_size <= mp->m_sb.sb_blocksize) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 4689b025f79f..98ce63e35bd4 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -30,11 +30,8 @@ struct xfs_btree_cur; */ #define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks -/* - * Move inodes in clusters of this size. - */ +/* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 -#define XFS_INODE_CLUSTER_SIZE(mp) (mp)->m_inode_cluster_size /* * Make an inode pointer out of the buffer/offset. diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 001aa893ed59..c1642c9a7e66 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2152,13 +2152,13 @@ xfs_ifree_cluster( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); - if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { + if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) { blks_per_cluster = 1; ninodes = mp->m_sb.sb_inopblock; nbufs = XFS_IALLOC_BLOCKS(mp); } else { - blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / - mp->m_sb.sb_blocksize; + blks_per_cluster = mp->m_inode_cluster_size / + mp->m_sb.sb_blocksize; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; } @@ -2906,13 +2906,13 @@ xfs_iflush_cluster( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) goto out_put; - mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); + mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c237ad15d500..0571012f67c0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -243,9 +243,9 @@ xfs_bulkstat( *done = 0; fmterror = 0; ubufp = ubuffer; - nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? + nicluster = mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size ? mp->m_sb.sb_inopblock : - (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); + (mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog); nimask = ~(nicluster - 1); nbcluster = nicluster >> mp->m_sb.sb_inopblog; irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index f76de2eadb6d..42458ab7a336 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2523,19 +2523,19 @@ xlog_recover_buffer_pass2( * * Also make sure that only inode buffers with good sizes stay in * the buffer cache. The kernel moves inodes in buffers of 1 block - * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger. The inode + * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel * running with a different inode cluster size. Regardless, if the - * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) - * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep + * the inode buffer size isn't MAX(blocksize, mp->m_inode_cluster_size) + * for *our* value of mp->m_inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, - (__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { + (__uint32_t)log->l_mp->m_inode_cluster_size))) { xfs_buf_stale(bp); error = xfs_bwrite(bp); } else { diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2fd59c0dae66..51c181dde119 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -385,7 +385,7 @@ xfs_calc_ifree_reservation( xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + - max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + xfs_calc_buf_res(1, 0) + xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels, 0) + From 126cd105d4408ff52437a72d681aecdb29cc80e8 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:48 +1100 Subject: [PATCH 33/51] xfs: get rid of XFS_IALLOC_BLOCKS macros Get rid of XFS_IALLOC_BLOCKS() marcos, use mp->m_ialloc_blks directly. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 12 ++++++------ fs/xfs/xfs_ialloc.h | 5 ----- fs/xfs/xfs_inode.c | 4 ++-- fs/xfs/xfs_log_recover.c | 4 ++-- fs/xfs/xfs_trans_resv.c | 8 ++++---- fs/xfs/xfs_trans_space.h | 2 +- 6 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 3ac5eb6bf856..eacc59c7cb80 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -333,7 +333,7 @@ xfs_ialloc_ag_alloc( if (args.mp->m_maxicount && args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) return XFS_ERROR(ENOSPC); - args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); + args.minlen = args.maxlen = args.mp->m_ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill @@ -343,7 +343,7 @@ xfs_ialloc_ag_alloc( newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + - XFS_IALLOC_BLOCKS(args.mp); + args.mp->m_ialloc_blks; if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); @@ -585,7 +585,7 @@ xfs_ialloc_ag_select( * Is there enough free space for the file plus a block of * inodes? (if we need to allocate some)? */ - ineed = XFS_IALLOC_BLOCKS(mp); + ineed = mp->m_ialloc_blks; longest = pag->pagf_longest; if (!longest) longest = pag->pagf_flcount > 0; @@ -1228,9 +1228,9 @@ xfs_difree( goto error0; } - xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, - agno, XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), - XFS_IALLOC_BLOCKS(mp), flist, mp); + xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, + XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), + mp->m_ialloc_blks, flist, mp); } else { *delete = 0; diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 98ce63e35bd4..d86f8bdc963e 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -25,11 +25,6 @@ struct xfs_mount; struct xfs_trans; struct xfs_btree_cur; -/* - * Allocation parameters for inode allocation. - */ -#define XFS_IALLOC_BLOCKS(mp) (mp)->m_ialloc_blks - /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c1642c9a7e66..0c8c334f0f15 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2155,12 +2155,12 @@ xfs_ifree_cluster( if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) { blks_per_cluster = 1; ninodes = mp->m_sb.sb_inopblock; - nbufs = XFS_IALLOC_BLOCKS(mp); + nbufs = mp->m_ialloc_blks; } else { blks_per_cluster = mp->m_inode_cluster_size / mp->m_sb.sb_blocksize; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; - nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; + nbufs = mp->m_ialloc_blks / blks_per_cluster; } for (j = 0; j < nbufs; j++, inum += ninodes) { diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 42458ab7a336..22b6f35765c1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3209,9 +3209,9 @@ xlog_recover_do_icreate_pass2( /* existing allocation is fixed value */ ASSERT(count == mp->m_ialloc_inos); - ASSERT(length == XFS_IALLOC_BLOCKS(mp)); + ASSERT(length == mp->m_ialloc_blks); if (count != mp->m_ialloc_inos || - length != XFS_IALLOC_BLOCKS(mp)) { + length != mp->m_ialloc_blks) { xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); return EINVAL; } diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 51c181dde119..2ffd3e331b49 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -174,7 +174,7 @@ xfs_calc_itruncate_reservation( xfs_calc_buf_res(5, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0))); } @@ -282,7 +282,7 @@ xfs_calc_create_resv_modify( * For create we can allocate some inodes giving: * the agi and agf of the ag getting the new inodes: 2 * sectorsize * the superblock for the nlink flag: sector size - * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + * the inode blocks allocated: mp->m_ialloc_blks * blocksize * the inode btree: max depth * blocksize * the allocation btrees: 2 trees * (max depth - 1) * block size */ @@ -292,7 +292,7 @@ xfs_calc_create_resv_alloc( { return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + mp->m_sb.sb_sectsize + - xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), XFS_FSB_TO_B(mp, 1)); @@ -387,7 +387,7 @@ xfs_calc_ifree_reservation( xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + xfs_calc_buf_res(1, 0) + - xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + + xfs_calc_buf_res(2 + mp->m_ialloc_blks + mp->m_in_maxlevels, 0) + xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), XFS_FSB_TO_B(mp, 1)); diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index 7d2c920dfb9c..af5dbe06cb65 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h @@ -47,7 +47,7 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - (XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1) + ((mp)->m_ialloc_blks + (mp)->m_in_maxlevels - 1) /* * Space reservation values for various transactions. From 904957b75033149509dc0fecc0af34348f87c78c Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:48 +1100 Subject: [PATCH 34/51] xfs: introduce a common helper xfs_icluster_size_fsb Introduce a common routine xfs_icluster_size_fsb() to calculate and return the number of file system blocks per inode cluster. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index d86f8bdc963e..812365d17e67 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -28,6 +28,16 @@ struct xfs_btree_cur; /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 +/* Calculate and return the number of filesystem blocks per inode cluster */ +static inline int +xfs_icluster_size_fsb( + struct xfs_mount *mp) +{ + if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) + return 1; + return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; +} + /* * Make an inode pointer out of the buffer/offset. */ From a2ba07b2d2215bed560aa67c84275304314d9691 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:48 +1100 Subject: [PATCH 35/51] xfs: use xfs_icluster_size_fsb in xfs_bulkstat Use xfs_icluster_size_fsb() in xfs_bulkstat(), make the related variables more meaningful and remove an unused variable nimask from it. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_itable.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 0571012f67c0..f46338285152 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -209,9 +209,8 @@ xfs_bulkstat( xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ xfs_ino_t lastino; /* last inode number returned */ - int nbcluster; /* # of blocks in a cluster */ - int nicluster; /* # of inodes in a cluster */ - int nimask; /* mask for inode clusters */ + int blks_per_cluster; /* # of blocks per cluster */ + int inodes_per_cluster;/* # of inodes per cluster */ int nirbuf; /* size of irbuf */ int rval; /* return value error code */ int tmp; /* result value from btree calls */ @@ -243,11 +242,8 @@ xfs_bulkstat( *done = 0; fmterror = 0; ubufp = ubuffer; - nicluster = mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size ? - mp->m_sb.sb_inopblock : - (mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog); - nimask = ~(nicluster - 1); - nbcluster = nicluster >> mp->m_sb.sb_inopblog; + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) return ENOMEM; @@ -390,12 +386,12 @@ xfs_bulkstat( agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino); for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; - chunkidx += nicluster, - agbno += nbcluster) { - if (xfs_inobt_maskn(chunkidx, nicluster) - & ~r.ir_free) + chunkidx += inodes_per_cluster, + agbno += blks_per_cluster) { + if (xfs_inobt_maskn(chunkidx, + inodes_per_cluster) & ~r.ir_free) xfs_btree_reada_bufs(mp, agno, - agbno, nbcluster, + agbno, blks_per_cluster, &xfs_inode_buf_ops); } blk_finish_plug(&plug); From 6e0c7b8c3ea62d684af267d34fc015253e7cd6e5 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:49 +1100 Subject: [PATCH 36/51] xfs: use xfs_icluster_size_fsb in xfs_ialloc_inode_init Use xfs_icluster_size_fsb() in xfs_ialloc_inode_init(), rename variable ninodes to inodes_per_cluster, the latter is more meaningful. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index eacc59c7cb80..6ee592cfc0db 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -170,27 +170,20 @@ xfs_ialloc_inode_init( { struct xfs_buf *fbuf; struct xfs_dinode *free; - int blks_per_cluster, nbufs, ninodes; + int nbufs, blks_per_cluster, inodes_per_cluster; int version; int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; /* - * Loop over the new block(s), filling in the inodes. - * For small block sizes, manipulate the inodes in buffers - * which are multiples of the blocks size. + * Loop over the new block(s), filling in the inodes. For small block + * sizes, manipulate the inodes in buffers which are multiples of the + * blocks size. */ - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) { - blks_per_cluster = 1; - nbufs = length; - ninodes = mp->m_sb.sb_inopblock; - } else { - blks_per_cluster = mp->m_inode_cluster_size / - mp->m_sb.sb_blocksize; - nbufs = length / blks_per_cluster; - ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; - } + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + nbufs = length / blks_per_cluster; /* * Figure out what version number to use in the inodes we create. If @@ -246,7 +239,7 @@ xfs_ialloc_inode_init( /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); - for (i = 0; i < ninodes; i++) { + for (i = 0; i < inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); From 982e939e4ddc8c00cb478fb4d725d0e3d18971cc Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:49 +1100 Subject: [PATCH 37/51] xfs: use xfs_icluster_size_fsb in xfs_ifree_cluster Use xfs_icluster_size_fsb() in xfs_ifree_cluster(), rename variable ninodes to inodes_per_cluster, the latter is more meaningful. Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_inode.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 0c8c334f0f15..833028cf205f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2141,8 +2141,8 @@ xfs_ifree_cluster( { xfs_mount_t *mp = free_ip->i_mount; int blks_per_cluster; + int inodes_per_cluster; int nbufs; - int ninodes; int i, j; xfs_daddr_t blkno; xfs_buf_t *bp; @@ -2152,18 +2152,11 @@ xfs_ifree_cluster( struct xfs_perag *pag; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) { - blks_per_cluster = 1; - ninodes = mp->m_sb.sb_inopblock; - nbufs = mp->m_ialloc_blks; - } else { - blks_per_cluster = mp->m_inode_cluster_size / - mp->m_sb.sb_blocksize; - ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; - nbufs = mp->m_ialloc_blks / blks_per_cluster; - } + blks_per_cluster = xfs_icluster_size_fsb(mp); + inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; + nbufs = mp->m_ialloc_blks / blks_per_cluster; - for (j = 0; j < nbufs; j++, inum += ninodes) { + for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) { blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), XFS_INO_TO_AGBNO(mp, inum)); @@ -2225,7 +2218,7 @@ xfs_ifree_cluster( * transaction stale above, which means there is no point in * even trying to lock them. */ - for (i = 0; i < ninodes; i++) { + for (i = 0; i < inodes_per_cluster; i++) { retry: rcu_read_lock(); ip = radix_tree_lookup(&pag->pag_ici_root, From f9e5abcfc5b299a988cf8f9d0ad11e03da14806b Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Fri, 13 Dec 2013 15:51:49 +1100 Subject: [PATCH 38/51] xfs: use xfs_icluster_size_fsb in xfs_imap Use xfs_icluster_size_fsb() in xfs_imap(). Signed-off-by: Jie Liu Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_ialloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 6ee592cfc0db..5d7f105a1c82 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1377,7 +1377,7 @@ xfs_imap( return XFS_ERROR(EINVAL); } - blks_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; + blks_per_cluster = xfs_icluster_size_fsb(mp); /* * For bulkstat and handle lookups, we have an untrusted inode number @@ -1398,7 +1398,7 @@ xfs_imap( * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. */ - if (mp->m_inode_cluster_size <= mp->m_sb.sb_blocksize) { + if (blks_per_cluster == 1) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); From 30ba7ad54335e4715d3cc9cc8f43cbf1b3535e46 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:07 -0800 Subject: [PATCH 39/51] xfs: no need to lock the inode in xfs_find_handle Both the inode number and the generation do not change on a live inode. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_ioctl.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 33ad9a77791f..518aa56b8f2e 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -112,15 +112,11 @@ xfs_find_handle( memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); hsize = sizeof(xfs_fsid_t); } else { - int lock_mode; - - lock_mode = xfs_ilock_map_shared(ip); handle.ha_fid.fid_len = sizeof(xfs_fid_t) - sizeof(handle.ha_fid.fid_len); handle.ha_fid.fid_pad = 0; handle.ha_fid.fid_gen = ip->i_d.di_gen; handle.ha_fid.fid_ino = ip->i_ino; - xfs_iunlock_map_shared(ip, lock_mode); hsize = XFS_HSIZE(handle); } From 01f4f3277556d4f4f833371db0219b0ca11c5409 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:08 -0800 Subject: [PATCH 40/51] xfs: remove xfs_iunlock_map_shared We can just use xfs_iunlock without any loss of clarity. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_file.c | 4 ++-- fs/xfs/xfs_inode.c | 17 ++--------------- fs/xfs/xfs_inode.h | 1 - 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c0323..2f32d7ee1411 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -737,7 +737,7 @@ xfs_getbmap( out_free_map: kmem_free(map); out_unlock_ilock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52c91e143725..349bfa28aa3d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1294,7 +1294,7 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) return -error; @@ -1402,7 +1402,7 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) return -error; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 001aa893ed59..967f90625eae 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -88,8 +88,7 @@ xfs_get_extsz_hint( * have been read in yet, and only lock the inode exclusively if they have not. * * The function returns a value which should be given to the corresponding - * xfs_iunlock_map_shared(). This value is the mode in which the lock was - * actually taken. + * xfs_iunlock() call. */ uint xfs_ilock_map_shared( @@ -109,18 +108,6 @@ xfs_ilock_map_shared( return lock_mode; } -/* - * This is simply the unlock routine to go with xfs_ilock_map_shared(). - * All it does is call xfs_iunlock() with the given lock_mode. - */ -void -xfs_iunlock_map_shared( - xfs_inode_t *ip, - unsigned int lock_mode) -{ - xfs_iunlock(ip, lock_mode); -} - /* * The xfs inode contains 2 locks: a multi-reader lock called the * i_iolock and a multi-reader lock called the i_lock. This routine @@ -590,7 +577,7 @@ xfs_lookup( lock_mode = xfs_ilock_map_shared(dp); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); - xfs_iunlock_map_shared(dp, lock_mode); + xfs_iunlock(dp, lock_mode); if (error) goto out; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9e6efccbae04..5e2bd17cf2be 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -338,7 +338,6 @@ void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_map_shared(xfs_inode_t *); -void xfs_iunlock_map_shared(xfs_inode_t *, uint); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, struct xfs_buf **, xfs_inode_t **); From 309ecac8e7c937c5811ef8f0efc14b3d1bd18775 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:09 -0800 Subject: [PATCH 41/51] xfs: rename xfs_ilock_map_shared Make it clear that we're only locking against the extent map on the data fork. Also clean the function up a little bit. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_file.c | 6 +++--- fs/xfs/xfs_inode.c | 17 ++++++----------- fs/xfs/xfs_inode.h | 2 +- 5 files changed, 12 insertions(+), 17 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 71c8c9d2b882..a26739451b53 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1217,7 +1217,7 @@ __xfs_get_blocks( lockmode = XFS_ILOCK_EXCL; xfs_ilock(ip, lockmode); } else { - lockmode = xfs_ilock_map_shared(ip); + lockmode = xfs_ilock_data_map_shared(ip); } ASSERT(offset <= mp->m_super->s_maxbytes); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f32d7ee1411..460aeb87c04e 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -632,7 +632,7 @@ xfs_getbmap( */ } - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); /* * Don't let nex be bigger than the number of extents diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 349bfa28aa3d..e00121592632 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -912,7 +912,7 @@ xfs_dir_open( * If there are any blocks, read-ahead block 0 as we're almost * certain to have the next operation be a read there. */ - mode = xfs_ilock_map_shared(ip); + mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) xfs_dir3_data_readahead(NULL, ip, 0, -1); xfs_iunlock(ip, mode); @@ -1215,7 +1215,7 @@ xfs_seek_data( uint lock; int error; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { @@ -1319,7 +1319,7 @@ xfs_seek_hole( if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 967f90625eae..fdd483783365 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -91,20 +91,15 @@ xfs_get_extsz_hint( * xfs_iunlock() call. */ uint -xfs_ilock_map_shared( - xfs_inode_t *ip) +xfs_ilock_data_map_shared( + struct xfs_inode *ip) { - uint lock_mode; + uint lock_mode = XFS_ILOCK_SHARED; - if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && - ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && + (ip->i_df.if_flags & XFS_IFEXTENTS) == 0) lock_mode = XFS_ILOCK_EXCL; - } else { - lock_mode = XFS_ILOCK_SHARED; - } - xfs_ilock(ip, lock_mode); - return lock_mode; } @@ -575,7 +570,7 @@ xfs_lookup( if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return XFS_ERROR(EIO); - lock_mode = xfs_ilock_map_shared(dp); + lock_mode = xfs_ilock_data_map_shared(dp); error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); xfs_iunlock(dp, lock_mode); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 5e2bd17cf2be..fde368624ea7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -337,7 +337,7 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); -uint xfs_ilock_map_shared(xfs_inode_t *); +uint xfs_ilock_data_map_shared(struct xfs_inode *); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, struct xfs_buf **, xfs_inode_t **); From efa70be165497826f674846f681e6e2364af906c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Dec 2013 02:14:39 -0800 Subject: [PATCH 42/51] xfs: add xfs_ilock_attr_map_shared Equivalent to xfs_ilock_data_map_shared, except for the attribute fork. Make xfs_getbmap use it if called for the attribute fork instead of xfs_ilock_data_map_shared. Signed-off-by: Christoph Hellwig Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap_util.c | 29 +++++++++++++++++------------ fs/xfs/xfs_inode.c | 34 ++++++++++++++++++++++++---------- fs/xfs/xfs_inode.h | 1 + 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 460aeb87c04e..374ba050942f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -617,22 +617,27 @@ xfs_getbmap( return XFS_ERROR(ENOMEM); xfs_ilock(ip, XFS_IOLOCK_SHARED); - if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { - if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { + if (whichfork == XFS_DATA_FORK) { + if (!(iflags & BMV_IF_DELALLOC) && + (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) { error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); if (error) goto out_unlock_iolock; - } - /* - * even after flushing the inode, there can still be delalloc - * blocks on the inode beyond EOF due to speculative - * preallocation. These are not removed until the release - * function is called or the inode is inactivated. Hence we - * cannot assert here that ip->i_delayed_blks == 0. - */ - } - lock = xfs_ilock_data_map_shared(ip); + /* + * Even after flushing the inode, there can still be + * delalloc blocks on the inode beyond EOF due to + * speculative preallocation. These are not removed + * until the release function is called or the inode + * is inactivated. Hence we cannot assert here that + * ip->i_delayed_blks == 0. + */ + } + + lock = xfs_ilock_data_map_shared(ip); + } else { + lock = xfs_ilock_attr_map_shared(ip); + } /* * Don't let nex be bigger than the number of extents diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index fdd483783365..e655bb07e8bb 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -77,17 +77,18 @@ xfs_get_extsz_hint( } /* - * This is a wrapper routine around the xfs_ilock() routine used to centralize - * some grungy code. It is used in places that wish to lock the inode solely - * for reading the extents. The reason these places can't just call - * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the - * extents from disk for a file in b-tree format. If the inode is in b-tree - * format, then we need to lock the inode exclusively until the extents are read - * in. Locking it exclusively all the time would limit our parallelism - * unnecessarily, though. What we do instead is check to see if the extents - * have been read in yet, and only lock the inode exclusively if they have not. + * These two are wrapper routines around the xfs_ilock() routine used to + * centralize some grungy code. They are used in places that wish to lock the + * inode solely for reading the extents. The reason these places can't just + * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to + * bringing in of the extents from disk for a file in b-tree format. If the + * inode is in b-tree format, then we need to lock the inode exclusively until + * the extents are read in. Locking it exclusively all the time would limit + * our parallelism unnecessarily, though. What we do instead is check to see + * if the extents have been read in yet, and only lock the inode exclusively + * if they have not. * - * The function returns a value which should be given to the corresponding + * The functions return a value which should be given to the corresponding * xfs_iunlock() call. */ uint @@ -103,6 +104,19 @@ xfs_ilock_data_map_shared( return lock_mode; } +uint +xfs_ilock_attr_map_shared( + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && + (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) + lock_mode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lock_mode); + return lock_mode; +} + /* * The xfs inode contains 2 locks: a multi-reader lock called the * i_iolock and a multi-reader lock called the i_lock. This routine diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fde368624ea7..65e2350f449c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -338,6 +338,7 @@ void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); uint xfs_ilock_data_map_shared(struct xfs_inode *); +uint xfs_ilock_attr_map_shared(struct xfs_inode *); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t, xfs_nlink_t, xfs_dev_t, prid_t, int, struct xfs_buf **, xfs_inode_t **); From 40194ecc6d78327d98e66de3213db96ca0a31e6f Mon Sep 17 00:00:00 2001 From: Ben Myers Date: Fri, 6 Dec 2013 12:30:11 -0800 Subject: [PATCH 43/51] xfs: reinstate the ilock in xfs_readdir Although it was removed in commit 051e7cd44ab8, ilock needs to be taken in xfs_readdir because we might have to read the extent list in from disk. This keeps other threads from reading from or writing to the extent list while it is being read in and is still in a transitional state. This has been associated with "Access to block zero" messages on directories with large numbers of extents resulting from excessive filesytem fragmentation, as well as extent list corruption. Unfortunately no test case at this point. Signed-off-by: Ben Myers Reviewed-by: Dave Chinner --- fs/xfs/xfs_dir2_readdir.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index c4e50c6ed584..aead369e1c30 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -674,6 +674,7 @@ xfs_readdir( { int rval; /* return value */ int v; /* type-checking value */ + uint lock_mode; trace_xfs_readdir(dp); @@ -683,6 +684,7 @@ xfs_readdir( ASSERT(S_ISDIR(dp->i_d.di_mode)); XFS_STATS_INC(xs_dir_getdents); + lock_mode = xfs_ilock_data_map_shared(dp); if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_getdents(dp, ctx); else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) @@ -691,5 +693,7 @@ xfs_readdir( rval = xfs_dir2_block_getdents(dp, ctx); else rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); + xfs_iunlock(dp, lock_mode); + return rval; } From 4f317369d46956ccd76b5d28cf66b3f8b24f3480 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:12 -0800 Subject: [PATCH 44/51] xfs: take the ilock around xfs_bmapi_read in xfs_zero_remaining_bytes Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap_util.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 374ba050942f..202a51f7c450 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1173,9 +1173,15 @@ xfs_zero_remaining_bytes( xfs_buf_unlock(bp); for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { + uint lock_mode; + offset_fsb = XFS_B_TO_FSBT(mp, offset); nimap = 1; + + lock_mode = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); + xfs_iunlock(ip, lock_mode); + if (error || nimap < 1) break; ASSERT(imap.br_blockcount >= 1); From f4df8adc8325127ff015ef9c2a8f005edaaedd07 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:13 -0800 Subject: [PATCH 45/51] xfs: use xfs_ilock_data_map_shared in xfs_qm_dqtobp We might not have read in the extent list at this point, so make sure we take the ilock exclusively if we have to do so. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_dquot.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 6b1e695caf0e..7aeb4c895b32 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -469,16 +469,17 @@ xfs_qm_dqtobp( struct xfs_mount *mp = dqp->q_mount; xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); struct xfs_trans *tp = (tpp ? *tpp : NULL); + uint lock_mode; dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - xfs_ilock(quotip, XFS_ILOCK_SHARED); + lock_mode = xfs_ilock_data_map_shared(quotip); if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { /* * Return if this type of quotas is turned off while we * didn't have the quota inode lock. */ - xfs_iunlock(quotip, XFS_ILOCK_SHARED); + xfs_iunlock(quotip, lock_mode); return ESRCH; } @@ -488,7 +489,7 @@ xfs_qm_dqtobp( error = xfs_bmapi_read(quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); - xfs_iunlock(quotip, XFS_ILOCK_SHARED); + xfs_iunlock(quotip, lock_mode); if (error) return error; From da51d32d4596a14ee33917b9eca056d4bf41706a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:14 -0800 Subject: [PATCH 46/51] xfs: use xfs_ilock_data_map_shared in xfs_qm_dqiterate We might not have read in the extent list at this point, so make sure we take the ilock exclusively if we have to do so. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_qm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index dd88f0e27bd8..348e4d2ed6e6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1222,16 +1222,18 @@ xfs_qm_dqiterate( lblkno = 0; maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); do { + uint lock_mode; + nmaps = XFS_DQITER_MAP_SIZE; /* * We aren't changing the inode itself. Just changing * some of its data. No new blocks are added here, and * the inode is never added to the transaction. */ - xfs_ilock(qip, XFS_ILOCK_SHARED); + lock_mode = xfs_ilock_data_map_shared(qip); error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno, map, &nmaps, 0); - xfs_iunlock(qip, XFS_ILOCK_SHARED); + xfs_iunlock(qip, lock_mode); if (error) break; From 683cb941598d1d81283c940c100e0ce40f494105 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:15 -0800 Subject: [PATCH 47/51] xfs: use xfs_ilock_attr_map_shared in xfs_attr_get We might not have read in the extent list at this point, so make sure we take the ilock exclusively if we have to do so. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_attr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index b86127072ac3..01b6a0102fbd 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -164,6 +164,7 @@ xfs_attr_get( { int error; struct xfs_name xname; + uint lock_mode; XFS_STATS_INC(xs_attr_get); @@ -174,9 +175,9 @@ xfs_attr_get( if (error) return error; - xfs_ilock(ip, XFS_ILOCK_SHARED); + lock_mode = xfs_ilock_attr_map_shared(ip); error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); - xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_iunlock(ip, lock_mode); return(error); } From 568d994e9f53657cb6b3e9c95a83c130d36f83c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:16 -0800 Subject: [PATCH 48/51] xfs: use xfs_ilock_attr_map_shared in xfs_attr_list_int We might not have read in the extent list at this point, so make sure we take the ilock exclusively if we have to do so. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_attr_list.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 2d174b128153..01db96f60cf0 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -507,17 +507,17 @@ xfs_attr_list_int( { int error; xfs_inode_t *dp = context->dp; + uint lock_mode; XFS_STATS_INC(xs_attr_list); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return EIO; - xfs_ilock(dp, XFS_ILOCK_SHARED); - /* * Decide on what work routines to call based on the inode size. */ + lock_mode = xfs_ilock_attr_map_shared(dp); if (!xfs_inode_hasattr(dp)) { error = 0; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { @@ -527,9 +527,7 @@ xfs_attr_list_int( } else { error = xfs_attr_node_list(context); } - - xfs_iunlock(dp, XFS_ILOCK_SHARED); - + xfs_iunlock(dp, lock_mode); return error; } From eef334e5776c8ef547ada4cec17549929fe590b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Dec 2013 12:30:17 -0800 Subject: [PATCH 49/51] xfs: assert that we hold the ilock for extent map access Make sure that xfs_bmapi_read has the ilock held in some way, and that xfs_bmapi_write, xfs_bmapi_delay, xfs_bunmapi and xfs_iread_extents are called with the ilock held exclusively. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap.c | 4 ++++ fs/xfs/xfs_inode_fork.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8401f11f378f..dbf27384b744 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -4002,6 +4002,7 @@ xfs_bmapi_read( ASSERT(*nmap >= 1); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| XFS_BMAPI_IGSTATE))); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && @@ -4196,6 +4197,7 @@ xfs_bmapi_delay( ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && @@ -4489,6 +4491,7 @@ xfs_bmapi_write( ASSERT(tp != NULL); ASSERT(len > 0); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && @@ -5040,6 +5043,7 @@ xfs_bunmapi( if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(len > 0); ASSERT(nexts >= 0); diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index cfee14a83cfe..e16985e1c2fd 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -431,6 +431,8 @@ xfs_iread_extents( xfs_ifork_t *ifp; xfs_extnum_t nextents; + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW, ip->i_mount); From 85dd0707f0cad26d60f2dc574d17a5ab948d10f7 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 1 Jan 2014 19:28:03 +0800 Subject: [PATCH 50/51] xfs: fix off-by-one error in xfs_attr3_rmt_verify With CRC check is enabled, if trying to set an attributes value just equal to the maximum size of XATTR_SIZE_MAX would cause the v3 remote attr write verification procedure failure, which would yield the back trace like below: XFS (sda7): Internal error xfs_attr3_rmt_write_verify at line 191 of file fs/xfs/xfs_attr_remote.c Call Trace: [] dump_stack+0x45/0x56 [] xfs_error_report+0x3b/0x40 [xfs] [] ? _xfs_buf_ioapply+0x6d/0x390 [xfs] [] xfs_corruption_error+0x55/0x80 [xfs] [] xfs_attr3_rmt_write_verify+0x14b/0x1a0 [xfs] [] ? _xfs_buf_ioapply+0x6d/0x390 [xfs] [] ? xfs_bdstrat_cb+0x55/0xb0 [xfs] [] _xfs_buf_ioapply+0x6d/0x390 [xfs] [] ? vm_map_ram+0x31a/0x460 [] ? wake_up_state+0x20/0x20 [] ? xfs_bdstrat_cb+0x55/0xb0 [xfs] [] xfs_buf_iorequest+0x6b/0xc0 [xfs] [] xfs_bdstrat_cb+0x55/0xb0 [xfs] [] xfs_bwrite+0x46/0x80 [xfs] [] xfs_attr_rmtval_set+0x334/0x490 [xfs] [] xfs_attr_leaf_addname+0x24a/0x410 [xfs] [] xfs_attr_set_int+0x223/0x470 [xfs] [] xfs_attr_set+0x96/0xb0 [xfs] [] xfs_xattr_set+0x42/0x70 [xfs] [] generic_setxattr+0x62/0x80 [] __vfs_setxattr_noperm+0x63/0x1b0 [] ? evm_inode_setxattr+0xe/0x10 [] vfs_setxattr+0xb5/0xc0 [] setxattr+0x12e/0x1c0 [] ? final_putname+0x22/0x50 [] ? putname+0x2b/0x40 [] ? user_path_at_empty+0x5f/0x90 [] ? __sb_start_write+0x49/0xe0 [] ? vm_mmap_pgoff+0x99/0xc0 [] SyS_setxattr+0x8f/0xe0 [] system_call_fastpath+0x1a/0x1f Tests: setfattr -n user.longxattr -v `perl -e 'print "A"x65536'` testfile This patch fix it to check the remote EA size is greater than the XATTR_SIZE_MAX rather than more than or equal to it, because it's valid if the specified EA value size is equal to the limitation as per VFS setxattr interface. Signed-off-by: Jie Liu Reviewed-by: Mark Tinguely Signed-off-by: Ben Myers --- fs/xfs/xfs_attr_remote.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 739e0a52deda..5549d69ddb45 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -110,7 +110,7 @@ xfs_attr3_rmt_verify( if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) return false; if (be32_to_cpu(rmt->rm_offset) + - be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) + be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) return false; if (rmt->rm_owner == 0) return false; From 6f96b3063cdd473c68664a190524ed966ac0cd92 Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Tue, 7 Jan 2014 16:53:34 +0800 Subject: [PATCH 51/51] xfs: Calling destroy_work_on_stack() to pair with INIT_WORK_ONSTACK() In case CONFIG_DEBUG_OBJECTS_WORK is defined, it is needed to call destroy_work_on_stack() which frees the debug object to pair with INIT_WORK_ONSTACK(). Signed-off-by: Liu, Chuansheng Reviewed-by: Ben Myers Signed-off-by: Ben Myers --- fs/xfs/xfs_bmap_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 5887e41c0323..3f534e0862b1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -287,6 +287,7 @@ xfs_bmapi_allocate( INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); queue_work(xfs_alloc_wq, &args->work); wait_for_completion(&done); + destroy_work_on_stack(&args->work); return args->result; }