xfs: bug fixes for 6.4-rc2

o fixes for inode garbage collection shutdown racing with work queue
   updates
 o ensure inodegc workers run on the CPU they are supposed to
 o disable counter scrubbing until we can exclusively freeze the
   filesystem from the kernel
 o Regression fixes for new allocation related bugs
 o a couple of minor cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQJIBAABCgAyFiEEmJOoJ8GffZYWSjj/regpR/R1+h0FAmRcSIsUHGRhdmlkQGZy
 b21vcmJpdC5jb20ACgkQregpR/R1+h2Y8xAAxtsTdOx71XtDuNyfBOiqzZgTCq6b
 6LsckJIDQa1AXjUNq9G3zWcUcWBcRWcw+CWbkqjqQ9W47K/ijLuoKnjRsQ+5B4DU
 TBUctVq+/Zk2lBlb6HKuKdzqDGnIFWGVKVd7u8KlowqnXuzUeQ0vFkT7ZHTepUKG
 P+midgGNVT4+tykq7oH0H8WxoTyNPZhKiAUcZjneBgA60IAoQWHA2iUt+SKpbrkL
 1HyK+/edVMTXiDXtyHfXmDaH9Pgy6NCpw3TNkPDhuL1UDpLhg/zgT39rFZGBsAUt
 gaDM3wN5jBrot/mvJE3rH9bdZhkcf+NQKPx/1DDg3DL8plS/1/LUC4cImdolBJ3w
 RNmgJv1lK+AlE4MUJ/bUDlEpHUmwAjnnsxBXwEvnYNfj+9V6/mDB+HqKiY7/XxVK
 vF77s6z+CWvefdnZavJ4/72pVVJNkcDYCYmvh/donRP6vtnwZyzocFUeBeNMInV1
 /s3WMrF9hwmJqAClKG7p1fnszWp658yFIuw/TXVs+NrjTtQgXwMpl2cEYYvUZEJN
 Trq2p0xH/JSwcnOPSPJO6WHb8UPoqrM6lgGFaJVWJx1AWt1i1CFLf5eA5X+XisDV
 AJKgpqlnDg02bBMQ0tMFGZUaNx/1S1mwtxcZsyEFTutpUNxqJKDaMohpxxrWb0WC
 ppSqDvyJN4wtlFI=
 =qok2
 -----END PGP SIGNATURE-----

Merge tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs bug fixes from Dave Chinner:
 "Largely minor bug fixes and cleanups, th emost important of which are
  probably the fixes for regressions in the extent allocation code:

   - fixes for inode garbage collection shutdown racing with work queue
     updates

   - ensure inodegc workers run on the CPU they are supposed to

   - disable counter scrubbing until we can exclusively freeze the
     filesystem from the kernel

   - regression fixes for new allocation related bugs

   - a couple of minor cleanups"

* tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix xfs_inodegc_stop racing with mod_delayed_work
  xfs: disable reaping in fscounters scrub
  xfs: check that per-cpu inodegc workers actually run on that cpu
  xfs: explicitly specify cpu when forcing inodegc delayed work to run immediately
  xfs: fix negative array access in xfs_getbmap
  xfs: don't allocate into the data fork for an unshare request
  xfs: flush dirty data and drain directios before scrubbing cow fork
  xfs: set bnobt/cntbt numrecs correctly when formatting new AGs
  xfs: don't unconditionally null args->pag in xfs_bmap_btalloc_at_eof
This commit is contained in:
Linus Torvalds 2023-05-11 16:51:11 -05:00
commit 849a4f0973
14 changed files with 65 additions and 63 deletions

View file

@ -495,10 +495,12 @@ xfs_freesp_init_recs(
ASSERT(start >= mp->m_ag_prealloc_blocks);
if (start != mp->m_ag_prealloc_blocks) {
/*
* Modify first record to pad stripe align of log
* Modify first record to pad stripe align of log and
* bump the record count.
*/
arec->ar_blockcount = cpu_to_be32(start -
mp->m_ag_prealloc_blocks);
be16_add_cpu(&block->bb_numrecs, 1);
nrec = arec + 1;
/*
@ -509,7 +511,6 @@ xfs_freesp_init_recs(
be32_to_cpu(arec->ar_startblock) +
be32_to_cpu(arec->ar_blockcount));
arec = nrec;
be16_add_cpu(&block->bb_numrecs, 1);
}
/*
* Change record start to after the internal log
@ -518,15 +519,13 @@ xfs_freesp_init_recs(
}
/*
* Calculate the record block count and check for the case where
* the log might have consumed all available space in the AG. If
* so, reset the record count to 0 to avoid exposure of an invalid
* record start block.
* Calculate the block count of this record; if it is nonzero,
* increment the record count.
*/
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
if (!arec->ar_blockcount)
block->bb_numrecs = 0;
if (arec->ar_blockcount)
be16_add_cpu(&block->bb_numrecs, 1);
}
/*
@ -538,7 +537,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@ -548,7 +547,7 @@ xfs_cntroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}

View file

@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof(
if (!caller_pag)
args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
if (!caller_pag)
if (!caller_pag) {
xfs_perag_put(args->pag);
args->pag = NULL;
}
if (error)
return error;
@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof(
* Exact allocation failed. Reset to try an aligned allocation
* according to the original allocation specification.
*/
args->pag = NULL;
args->alignment = stripe_align;
args->minlen = nextminlen;
args->minalignslop = 0;

View file

@ -42,12 +42,12 @@ xchk_setup_inode_bmap(
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
/*
* We don't want any ephemeral data fork updates sitting around
* We don't want any ephemeral data/cow fork updates sitting around
* while we inspect block mappings, so wait for directio to finish
* and flush dirty data if we have delalloc reservations.
*/
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;

View file

@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks(
return 0;
}
/* Pause background reaping of resources. */
void
xchk_stop_reaping(
struct xfs_scrub *sc)
{
sc->flags |= XCHK_REAPING_DISABLED;
xfs_blockgc_stop(sc->mp);
xfs_inodegc_stop(sc->mp);
}
/* Restart background reaping of resources. */
void
xchk_start_reaping(
struct xfs_scrub *sc)
{
/*
* Readonly filesystems do not perform inactivation or speculative
* preallocation, so there's no need to restart the workers.
*/
if (!xfs_is_readonly(sc->mp)) {
xfs_inodegc_start(sc->mp);
xfs_blockgc_start(sc->mp);
}
sc->flags &= ~XCHK_REAPING_DISABLED;
}
/*
* Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
* operation. Callers must not hold any locks that intersect with the CPU

View file

@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
}
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
void xchk_stop_reaping(struct xfs_scrub *sc);
void xchk_start_reaping(struct xfs_scrub *sc);
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take

View file

@ -150,13 +150,6 @@ xchk_setup_fscounters(
if (error)
return error;
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
* our calculations.
*/
xchk_stop_reaping(sc);
return xchk_trans_alloc(sc, 0);
}
@ -453,6 +446,12 @@ xchk_fscounters(
if (frextents > mp->m_sb.sb_rextents)
xchk_set_corrupt(sc);
/*
* XXX: We can't quiesce percpu counter updates, so exit early.
* This can be re-enabled when we gain exclusive freeze functionality.
*/
return 0;
/*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.

View file

@ -186,8 +186,6 @@ xchk_teardown(
}
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
mnt_drop_write_file(sc->file);
if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc);
if (sc->buf) {
if (sc->buf_cleanup)
sc->buf_cleanup(sc->buf);

View file

@ -106,7 +106,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */

View file

@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
#define XFS_SCRUB_STATE_STRINGS \
{ XCHK_TRY_HARDER, "try_harder" }, \
{ XCHK_REAPING_DISABLED, "reaping_disabled" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }

View file

@ -558,7 +558,9 @@ xfs_getbmap(
if (!xfs_iext_next_extent(ifp, &icur, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
if (bmv->bmv_entries > 0)
out[bmv->bmv_entries - 1].bmv_oflags |=
BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) {

View file

@ -435,18 +435,23 @@ xfs_iget_check_free_state(
}
/* Make all pending inactivation work start immediately. */
static void
static bool
xfs_inodegc_queue_all(
struct xfs_mount *mp)
{
struct xfs_inodegc *gc;
int cpu;
bool ret = false;
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
if (!llist_empty(&gc->list))
if (!llist_empty(&gc->list)) {
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
ret = true;
}
}
return ret;
}
/*
@ -1856,6 +1861,8 @@ xfs_inodegc_worker(
struct xfs_inode *ip, *n;
unsigned int nofs_flag;
ASSERT(gc->cpu == smp_processor_id());
WRITE_ONCE(gc->items, 0);
if (!node)
@ -1909,24 +1916,41 @@ xfs_inodegc_flush(
/*
* Flush all the pending work and then disable the inode inactivation background
* workers and wait for them to stop.
* workers and wait for them to stop. Caller must hold sb->s_umount to
* coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_stop(
struct xfs_mount *mp)
{
bool rerun;
if (!xfs_clear_inodegc_enabled(mp))
return;
/*
* Drain all pending inodegc work, including inodes that could be
* queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
* threads that sample the inodegc state just prior to us clearing it.
* The inodegc flag state prevents new threads from queuing more
* inodes, so we queue pending work items and flush the workqueue until
* all inodegc lists are empty. IOWs, we cannot use drain_workqueue
* here because it does not allow other unserialized mechanisms to
* reschedule inodegc work while this draining is in progress.
*/
xfs_inodegc_queue_all(mp);
drain_workqueue(mp->m_inodegc_wq);
do {
flush_workqueue(mp->m_inodegc_wq);
rerun = xfs_inodegc_queue_all(mp);
} while (rerun);
trace_xfs_inodegc_stop(mp, __return_address);
}
/*
* Enable the inode inactivation background workers and schedule deferred inode
* inactivation work if there is any.
* inactivation work if there is any. Caller must hold sb->s_umount to
* coordinate changes in the inodegc_enabled state.
*/
void
xfs_inodegc_start(
@ -2069,7 +2093,8 @@ xfs_inodegc_queue(
queue_delay = 0;
trace_xfs_inodegc_queue(mp, __return_address);
mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
queue_delay);
put_cpu_ptr(gc);
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead(
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
0);
}
put_cpu_ptr(gc);
}

View file

@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin(
if (eof)
imap.br_startoff = end_fsb; /* fake hole until the end */
/* We never need to allocate blocks for zeroing a hole. */
if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
/* We never need to allocate blocks for zeroing or unsharing a hole. */
if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
imap.br_startoff > offset_fsb) {
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
goto out_unlock;
}

View file

@ -66,6 +66,9 @@ struct xfs_inodegc {
/* approximate count of inodes in the list */
unsigned int items;
unsigned int shrinker_hits;
#if defined(DEBUG) || defined(XFS_WARN)
unsigned int cpu;
#endif
};
/*

View file

@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu(
for_each_possible_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
#if defined(DEBUG) || defined(XFS_WARN)
gc->cpu = cpu;
#endif
init_llist_head(&gc->list);
gc->items = 0;
INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);