Bug fixes for 6.7-rc2:

* Fix deadlock arising due to intent items in AIL not being cleared when log
    recovery fails.
  * Fix stale data exposure bug when remapping COW fork extents to data fork.
  * Fix deadlock when data device flush fails.
  * Fix AGFL minimum size calculation.
  * Select DEBUG_FS instead of XFS_DEBUG when XFS_ONLINE_SCRUB_STATS is
    selected.
  * Fix corruption of log inode's extent count field when NREXT64 feature is
  enabled.
 
 Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQQjMC4mbgVeU7MxEIYH7y4RirJu9AUCZVNouAAKCRAH7y4RirJu
 9O0mAQDePPSRT8ZrR63dxFZ1AW55q4y9iqgBxWcnKEelmVULPwD/byzoAJ46jvcL
 qpBHUJ1rUIcd/fGqAEkwfG6hKzD99w8=
 =G+60
 -----END PGP SIGNATURE-----

Merge tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Chandan Babu:

 - Fix deadlock arising due to intent items in AIL not being cleared
   when log recovery fails

 - Fix stale data exposure bug when remapping COW fork extents to data
   fork

 - Fix deadlock when data device flush fails

 - Fix AGFL minimum size calculation

 - Select DEBUG_FS instead of XFS_DEBUG when XFS_ONLINE_SCRUB_STATS is
   selected

 - Fix corruption of log inode's extent count field when NREXT64 feature
   is enabled

* tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: recovery should not clear di_flushiter unconditionally
  xfs: inode recovery does not validate the recovered inode
  xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS
  xfs: fix internal error from AGFL exhaustion
  xfs: up(ic_sema) if flushing data device fails
  xfs: only remap the written blocks in xfs_reflink_end_cow_extent
  XFS: Update MAINTAINERS to catch all XFS documentation
  xfs: abort intent items when recovery intents fail
  xfs: factor out xfs_defer_pending_abort
This commit is contained in:
Linus Torvalds 2023-11-18 11:28:28 -08:00
commit b8f1fa2419
10 changed files with 97 additions and 50 deletions

View File

@ -23882,8 +23882,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
F: Documentation/ABI/testing/sysfs-fs-xfs
F: Documentation/admin-guide/xfs.rst
F: Documentation/filesystems/xfs-delayed-logging-design.rst
F: Documentation/filesystems/xfs-self-describing-metadata.rst
F: Documentation/filesystems/xfs-*
F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h

View File

@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
bool "XFS online metadata check usage data collection"
default y
depends on XFS_ONLINE_SCRUB
select XFS_DEBUG
select DEBUG_FS
help
If you say Y here, the kernel will gather usage data about
the online metadata check subsystem. This includes the number

View File

@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
ASSERT(mp->m_alloc_maxlevels > 0);
/*
* For a btree shorter than the maximum height, the worst case is that
* every level gets split and a new level is added, then while inserting
* another entry to refill the AGFL, every level under the old root gets
* split again. This is:
*
* (full height split reservation) + (AGFL refill split height)
* = (current height + 1) + (current height - 1)
* = (new height) + (new height - 2)
* = 2 * new height - 2
*
* For a btree of maximum height, the worst case is that every level
* under the root gets split, then while inserting another entry to
* refill the AGFL, every level under the root gets split again. This is
* also:
*
* 2 * (current height - 1)
* = 2 * (new height - 1)
* = 2 * new height - 2
*/
/* space needed by-bno freespace btree */
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_alloc_maxlevels);
mp->m_alloc_maxlevels) * 2 - 2;
/* space needed by-size freespace btree */
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_alloc_maxlevels);
mp->m_alloc_maxlevels) * 2 - 2;
/* space needed reverse mapping used space btree */
if (xfs_has_rmapbt(mp))
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels);
mp->m_rmap_maxlevels) * 2 - 2;
return min_free;
}

View File

@ -245,26 +245,33 @@ xfs_defer_create_intents(
return ret;
}
STATIC void
xfs_defer_pending_abort(
struct xfs_mount *mp,
struct list_head *dop_list)
{
struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops;
/* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(mp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
}
/* Abort all the intents that were committed. */
STATIC void
xfs_defer_trans_abort(
struct xfs_trans *tp,
struct list_head *dop_pending)
{
struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops;
trace_xfs_defer_trans_abort(tp, _RET_IP_);
/* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_pending, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
xfs_defer_pending_abort(tp->t_mountp, dop_pending);
}
/*
@ -756,12 +763,13 @@ xfs_defer_ops_capture(
/* Release all resources that we used to capture deferred ops. */
void
xfs_defer_ops_capture_free(
xfs_defer_ops_capture_abort(
struct xfs_mount *mp,
struct xfs_defer_capture *dfc)
{
unsigned short i;
xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
/* Commit the transaction and add the capture structure to the list. */
error = xfs_trans_commit(tp);
if (error) {
xfs_defer_ops_capture_free(mp, dfc);
xfs_defer_ops_capture_abort(mp, dfc);
return error;
}

View File

@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
struct list_head *capture_list);
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
struct xfs_defer_resources *dres);
void xfs_defer_ops_capture_free(struct xfs_mount *mp,
void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
struct xfs_defer_capture *d);
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);

View File

@ -510,6 +510,9 @@ xfs_dinode_verify(
if (mode && nextents + naextents > nblocks)
return __this_address;
if (nextents + naextents == 0 && nblocks != 0)
return __this_address;
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;

View File

@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
struct xfs_log_dinode *ldip;
uint isize;
int need_free = 0;
xfs_failaddr_t fa;
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2(
* superblock flag to determine whether we need to look at di_flushiter
* to skip replay when the on disk inode is newer than the log one
*/
if (!xfs_has_v3inodes(mp) &&
ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
*/
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
/* do nothing */
} else {
trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto out_release;
if (!xfs_has_v3inodes(mp)) {
if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
*/
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
/* do nothing */
} else {
trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto out_release;
}
}
/* Take the opportunity to reset the flush iteration count */
ldip->di_flushiter = 0;
}
/* Take the opportunity to reset the flush iteration count */
ldip->di_flushiter = 0;
if (unlikely(S_ISREG(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
@ -528,8 +531,19 @@ out_owner_change:
(dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list);
/* re-generate the checksum. */
/* re-generate the checksum and validate the recovered inode. */
xfs_dinode_calc_crc(log->l_mp, dip);
fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
if (fa) {
XFS_CORRUPTION_ERROR(
"Bad dinode after recovery",
XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
xfs_alert(mp,
"Metadata corruption detected at %pS, inode 0x%llx",
fa, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
}
ASSERT(bp->b_mount == mp);
bp->b_flags |= _XBF_LOGRECOVERY;

View File

@ -1893,9 +1893,7 @@ xlog_write_iclog(
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
xlog_state_done_syncing(iclog);
up(&iclog->ic_sema);
return;
goto sync;
}
/*
@ -1925,20 +1923,17 @@ xlog_write_iclog(
* avoid shutdown re-entering this path and erroring out again.
*/
if (log->l_targ != log->l_mp->m_ddev_targp &&
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return;
}
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
goto shutdown;
}
if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA;
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return;
}
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
goto shutdown;
if (is_vmalloc_addr(iclog->ic_data))
flush_kernel_vmap_range(iclog->ic_data, count);
@ -1959,6 +1954,12 @@ xlog_write_iclog(
}
submit_bio(&iclog->ic_bio);
return;
shutdown:
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
sync:
xlog_state_done_syncing(iclog);
up(&iclog->ic_sema);
}
/*

View File

@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
list_del_init(&dfc->dfc_list);
xfs_defer_ops_capture_free(mp, dfc);
xfs_defer_ops_capture_abort(mp, dfc);
}
}

View File

@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
}
}
del = got;
xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
/* Grab the corresponding mapping in the data fork. */
nmaps = 1;