xfs: repair the rmapbt

Rebuild the reverse mapping btree from all primary metadata.  This first
patch establishes the bare mechanics of finding records and putting
together a new ondisk tree; more complex pieces are needed to make it
work properly.

Link: Documentation/filesystems/xfs-online-fsck-design.rst
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-02-22 12:43:38 -08:00
parent e4fd1def30
commit 32080a9b9b
18 changed files with 1608 additions and 19 deletions

View File

@ -201,6 +201,7 @@ xfs-y += $(addprefix scrub/, \
reap.o \
refcount_repair.o \
repair.o \
rmap_repair.o \
)
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \

View File

@ -90,6 +90,7 @@ struct xfs_perag {
uint8_t pagf_repair_bno_level;
uint8_t pagf_repair_cnt_level;
uint8_t pagf_repair_refcount_level;
uint8_t pagf_repair_rmap_level;
#endif
spinlock_t pag_state_lock;

View File

@ -6379,3 +6379,46 @@ xfs_bunmapi_range(
out:
return error;
}
struct xfs_bmap_query_range {
xfs_bmap_query_range_fn fn;
void *priv;
};
/* Format btree record and pass to our callback. */
STATIC int
xfs_bmap_query_range_helper(
struct xfs_btree_cur *cur,
const union xfs_btree_rec *rec,
void *priv)
{
struct xfs_bmap_query_range *query = priv;
struct xfs_bmbt_irec irec;
xfs_failaddr_t fa;
xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
&irec);
if (fa) {
xfs_btree_mark_sick(cur);
return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
cur->bc_ino.whichfork, fa, &irec);
}
return query->fn(cur, &irec, query->priv);
}
/* Find all bmaps. */
int
xfs_bmap_query_all(
struct xfs_btree_cur *cur,
xfs_bmap_query_range_fn fn,
void *priv)
{
struct xfs_bmap_query_range query = {
.priv = priv,
.fn = fn,
};
return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
}

View File

@ -280,4 +280,12 @@ extern struct kmem_cache *xfs_bmap_intent_cache;
int __init xfs_bmap_intent_init_cache(void);
void xfs_bmap_intent_destroy_cache(void);
typedef int (*xfs_bmap_query_range_fn)(
struct xfs_btree_cur *cur,
struct xfs_bmbt_irec *rec,
void *priv);
int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
void *priv);
#endif /* __XFS_BMAP_H__ */

View File

@ -215,10 +215,10 @@ xfs_rmap_btrec_to_irec(
/* Simple checks for rmap records. */
xfs_failaddr_t
xfs_rmap_check_irec(
struct xfs_btree_cur *cur,
struct xfs_perag *pag,
const struct xfs_rmap_irec *irec)
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_mount *mp = pag->pag_mount;
bool is_inode;
bool is_unwritten;
bool is_bmbt;
@ -233,8 +233,8 @@ xfs_rmap_check_irec(
return __this_address;
} else {
/* check for valid extent range, including overflow */
if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock,
irec->rm_blockcount))
if (!xfs_verify_agbext(pag, irec->rm_startblock,
irec->rm_blockcount))
return __this_address;
}
@ -307,7 +307,7 @@ xfs_rmap_get_rec(
fa = xfs_rmap_btrec_to_irec(rec, irec);
if (!fa)
fa = xfs_rmap_check_irec(cur, irec);
fa = xfs_rmap_check_irec(cur->bc_ag.pag, irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, irec);
@ -2442,7 +2442,7 @@ xfs_rmap_query_range_helper(
fa = xfs_rmap_btrec_to_irec(rec, &irec);
if (!fa)
fa = xfs_rmap_check_irec(cur, &irec);
fa = xfs_rmap_check_irec(cur->bc_ag.pag, &irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, &irec);

View File

@ -195,7 +195,7 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
union xfs_btree_rec;
xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur,
xfs_failaddr_t xfs_rmap_check_irec(struct xfs_perag *pag,
const struct xfs_rmap_irec *irec);
int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,

View File

@ -342,7 +342,18 @@ xfs_rmapbt_verify(
level = be16_to_cpu(block->bb_level);
if (pag && xfs_perag_initialised_agf(pag)) {
if (level >= pag->pagf_rmap_level)
unsigned int maxlevel = pag->pagf_rmap_level;
#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Online repair could be rewriting the free space btrees, so
* we'll validate against the larger of either tree while this
* is going on.
*/
maxlevel = max_t(unsigned int, maxlevel,
pag->pagf_repair_rmap_level);
#endif
if (level >= maxlevel)
return __this_address;
} else if (level >= mp->m_rmap_maxlevels)
return __this_address;

View File

@ -460,7 +460,7 @@ xchk_perag_read_headers(
* Grab the AG headers for the attached perag structure and wait for pending
* intents to drain.
*/
static int
int
xchk_perag_drain_and_lock(
struct xfs_scrub *sc)
{

View File

@ -134,6 +134,7 @@ int xchk_setup_nlinks(struct xfs_scrub *sc);
void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
struct xchk_ag *sa);
int xchk_perag_drain_and_lock(struct xfs_scrub *sc);
/*
* Grab all AG resources, treating the inability to grab the perag structure as

View File

@ -239,7 +239,11 @@ xrep_newbt_alloc_ag_blocks(
xrep_newbt_validate_ag_alloc_hint(xnr);
error = xfs_alloc_vextent_near_bno(&args, xnr->alloc_hint);
if (xnr->alloc_vextent)
error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
else
error = xfs_alloc_vextent_near_bno(&args,
xnr->alloc_hint);
if (error)
return error;
if (args.fsbno == NULLFSBLOCK)
@ -309,7 +313,11 @@ xrep_newbt_alloc_file_blocks(
xrep_newbt_validate_file_alloc_hint(xnr);
error = xfs_alloc_vextent_start_ag(&args, xnr->alloc_hint);
if (xnr->alloc_vextent)
error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
else
error = xfs_alloc_vextent_start_ag(&args,
xnr->alloc_hint);
if (error)
return error;
if (args.fsbno == NULLFSBLOCK)

View File

@ -6,6 +6,8 @@
#ifndef __XFS_SCRUB_NEWBT_H__
#define __XFS_SCRUB_NEWBT_H__
struct xfs_alloc_arg;
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
@ -28,6 +30,11 @@ struct xrep_newbt_resv {
struct xrep_newbt {
struct xfs_scrub *sc;
/* Custom allocation function, or NULL for xfs_alloc_vextent */
int (*alloc_vextent)(struct xfs_scrub *sc,
struct xfs_alloc_arg *args,
xfs_fsblock_t alloc_hint);
/* List of extents that we've reserved. */
struct list_head resv_list;

View File

@ -114,7 +114,7 @@ xreap_put_freelist(
int error;
/* Make sure there's space on the freelist. */
error = xrep_fix_freelist(sc, true);
error = xrep_fix_freelist(sc, 0);
if (error)
return error;

View File

@ -401,7 +401,7 @@ xrep_calc_ag_resblks(
int
xrep_fix_freelist(
struct xfs_scrub *sc,
bool can_shrink)
int alloc_flags)
{
struct xfs_alloc_arg args = {0};
@ -411,8 +411,7 @@ xrep_fix_freelist(
args.alignment = 1;
args.pag = sc->sa.pag;
return xfs_alloc_fix_freelist(&args,
can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
return xfs_alloc_fix_freelist(&args, alloc_flags);
}
/*

View File

@ -51,7 +51,7 @@ struct xbitmap;
struct xagb_bitmap;
struct xfsb_bitmap;
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags);
struct xrep_find_ag_btree {
/* in: rmap owner of the btree we're looking for */
@ -86,6 +86,7 @@ int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
@ -111,6 +112,7 @@ int xrep_agfl(struct xfs_scrub *sc);
int xrep_agi(struct xfs_scrub *sc);
int xrep_allocbt(struct xfs_scrub *sc);
int xrep_iallocbt(struct xfs_scrub *sc);
int xrep_rmapbt(struct xfs_scrub *sc);
int xrep_refcountbt(struct xfs_scrub *sc);
int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
@ -177,6 +179,7 @@ xrep_setup_nothing(
return 0;
}
#define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_setup_ag_rmapbt xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@ -190,6 +193,7 @@ xrep_setup_nothing(
#define xrep_agi xrep_notsupported
#define xrep_allocbt xrep_notsupported
#define xrep_iallocbt xrep_notsupported
#define xrep_rmapbt xrep_notsupported
#define xrep_refcountbt xrep_notsupported
#define xrep_inode xrep_notsupported
#define xrep_bmap_data xrep_notsupported

View File

@ -25,6 +25,7 @@
#include "scrub/btree.h"
#include "scrub/bitmap.h"
#include "scrub/agb_bitmap.h"
#include "scrub/repair.h"
/*
* Set us up to scrub reverse mapping btrees.
@ -36,6 +37,14 @@ xchk_setup_ag_rmapbt(
if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
if (xchk_could_repair(sc)) {
int error;
error = xrep_setup_ag_rmapbt(sc);
if (error)
return error;
}
return xchk_setup_ag_btree(sc, false);
}
@ -349,7 +358,7 @@ xchk_rmapbt_rec(
struct xfs_rmap_irec irec;
if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
xfs_rmap_check_irec(bs->cur, &irec) != NULL) {
xfs_rmap_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}

1466
fs/xfs/scrub/rmap_repair.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -278,7 +278,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.setup = xchk_setup_ag_rmapbt,
.scrub = xchk_rmapbt,
.has = xfs_has_rmapbt,
.repair = xrep_notsupported,
.repair = xrep_rmapbt,
},
[XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
.type = ST_PERAG,

View File

@ -1595,7 +1595,6 @@ DEFINE_EVENT(xrep_rmap_class, name, \
uint64_t owner, uint64_t offset, unsigned int flags), \
TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
DEFINE_REPAIR_RMAP_EVENT(xrep_ibt_walk_rmap);
DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_walk_rmap);
TRACE_EVENT(xrep_abt_found,
@ -1713,6 +1712,38 @@ TRACE_EVENT(xrep_bmap_found,
__entry->state)
);
TRACE_EVENT(xrep_rmap_found,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
const struct xfs_rmap_irec *rec),
TP_ARGS(mp, agno, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(uint64_t, owner)
__field(uint64_t, offset)
__field(unsigned int, flags)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = rec->rm_startblock;
__entry->len = rec->rm_blockcount;
__entry->owner = rec->rm_owner;
__entry->offset = rec->rm_offset;
__entry->flags = rec->rm_flags;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
__entry->owner,
__entry->offset,
__entry->flags)
);
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
uint32_t magic, uint16_t level),