xfs: use deferred frees for btree block freeing

Btrees that aren't freespace management trees use the normal extent
allocation and freeing routines for their blocks. Hence when a btree
block is freed, a direct call to xfs_free_extent() is made and the
extent is immediately freed. This puts the entire free space
management btrees under this path, so we are stacking btrees on
btrees in the call stack. The inobt, finobt and refcount btrees
all do this.

However, the bmap btree does not do this - it calls
xfs_free_extent_later() to defer the extent free operation via an
XEFI and hence it gets processed in deferred operation processing
during the commit of the primary transaction (i.e. via intent
chaining).

We need to change xfs_free_extent() to behave in a non-blocking
manner so that we can avoid deadlocks with busy extents near ENOSPC
in transactions that free multiple extents. Inserting or removing a
record from a btree can cause a multi-level tree merge operation and
that will free multiple blocks from the btree in a single
transaction. i.e. we can call xfs_free_extent() multiple times, and
hence the btree manipulation transaction is vulnerable to this busy
extent deadlock vector.

To fix this, convert all the remaining callers of xfs_free_extent()
to use xfs_free_extent_later() to queue XEFIs and hence defer
processing of the extent frees to a context that can be safely
restarted if a deadlock condition is detected.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
This commit is contained in:
Dave Chinner 2023-06-28 11:04:32 -07:00 committed by Darrick J. Wong
parent 939bd50dfb
commit b742d7b4f0
11 changed files with 33 additions and 26 deletions

View File

@ -985,7 +985,7 @@ xfs_ag_shrink_space(
goto resv_err;
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
true);
XFS_AG_RESV_NONE, true);
if (err2)
goto resv_err;

View File

@ -2449,6 +2449,7 @@ xfs_defer_agfl_block(
xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
xefi->xefi_blockcount = 1;
xefi->xefi_owner = oinfo->oi_owner;
xefi->xefi_agresv = XFS_AG_RESV_AGFL;
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock)))
return -EFSCORRUPTED;
@ -2470,6 +2471,7 @@ __xfs_free_extent_later(
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
bool skip_discard)
{
struct xfs_extent_free_item *xefi;
@ -2490,6 +2492,7 @@ __xfs_free_extent_later(
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
ASSERT(xfs_extfree_item_cache != NULL);
ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
return -EFSCORRUPTED;
@ -2498,6 +2501,7 @@ __xfs_free_extent_later(
GFP_KERNEL | __GFP_NOFAIL);
xefi->xefi_startblock = bno;
xefi->xefi_blockcount = (xfs_extlen_t)len;
xefi->xefi_agresv = type;
if (skip_discard)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
if (oinfo) {

View File

@ -232,7 +232,7 @@ xfs_buf_to_agfl_bno(
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
enum xfs_ag_resv_type type, bool skip_discard);
/*
* List of extents to be free "later".
@ -245,6 +245,7 @@ struct xfs_extent_free_item {
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
struct xfs_perag *xefi_pag;
unsigned int xefi_flags;
enum xfs_ag_resv_type xefi_agresv;
};
void xfs_extent_free_get_group(struct xfs_mount *mp,
@ -259,9 +260,10 @@ xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo)
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
return __xfs_free_extent_later(tp, bno, len, oinfo, false);
return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
}

View File

@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
return error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
XFS_AG_RESV_NONE);
if (error)
return error;
@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
} else {
error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
(bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN);
XFS_AG_RESV_NONE,
((bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN));
if (error)
goto done;
}

View File

@ -271,7 +271,8 @@ xfs_bmbt_free_block(
int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
XFS_AG_RESV_NONE);
if (error)
return error;

View File

@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
/* not sparse, calculate extent info directly */
return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno),
M_IGEO(mp)->ialloc_blks,
&XFS_RMAP_OINFO_INODES);
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
XFS_AG_RESV_NONE);
}
/* holemask is only 16-bits (fits in an unsigned long) */
@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, agbno),
contigblk, &XFS_RMAP_OINFO_INODES);
XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
if (error)
return error;

View File

@ -160,8 +160,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_INOBT, resv);
}

View File

@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL);
tmp.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno,
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL);
ext.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error)
goto out_error;
}
@ -1981,7 +1983,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
rr->rr_rrec.rc_blockcount, NULL);
rr->rr_rrec.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error)
goto out_trans;

View File

@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
if (error)
return error;
return error;
}
STATIC int

View File

@ -365,7 +365,7 @@ xfs_trans_free_extent(
agbno, xefi->xefi_blockcount);
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE,
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
/*
@ -644,6 +644,7 @@ xfs_efi_item_recover(
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
struct xfs_extent_free_item fake = {
.xefi_owner = XFS_RMAP_OWN_UNKNOWN,
.xefi_agresv = XFS_AG_RESV_NONE,
};
struct xfs_extent *extp;

View File

@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
del.br_blockcount);
error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL);
del.br_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error)
break;