diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index c5a6c47d3df2..699092195f41 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -1302,6 +1302,9 @@ xchk_fsgates_enable( if (scrub_fsgates & XCHK_FSGATES_QUOTA) xfs_dqtrx_hook_enable(); + if (scrub_fsgates & XCHK_FSGATES_DIRENTS) + xfs_dir_hook_enable(); + sc->flags |= scrub_fsgates; } diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index c899a50a83da..341ab737f9c5 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -43,8 +43,7 @@ int xchk_setup_nlinks( struct xfs_scrub *sc) { - /* Not ready for general consumption yet. */ - return -EOPNOTSUPP; + xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); sc->buf = kzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); if (!sc->buf) @@ -63,6 +62,21 @@ xchk_setup_nlinks( * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, * ECANCELED) that are absorbed into a scrub state flag update by * xchk_*_process_error. + * + * Because we are scanning a live filesystem, it's possible that another thread + * will try to update the link counts for an inode that we've already scanned. + * This will cause our counts to be incorrect. Therefore, we hook all + * directory entry updates because that is when link count updates occur. By + * shadowing transaction updates in this manner, live nlink check can ensure by + * locking the inode and the shadow structure that its own copies are not out + * of date. Because the hook code runs in a different process context from the + * scrub code and the scrub state flags are not accessed atomically, failures + * in the hook code must abort the iscan and the scrubber must notice the + * aborted scan and set the incomplete flag. + * + * Note that we use jump labels and srcu notifier hooks to minimize the + * overhead when live nlinks is /not/ running. Locking order for nlink + * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. */ /* @@ -120,6 +134,63 @@ xchk_nlinks_update_incore( return error; } +/* + * Apply a link count change from the regular filesystem into our shadow link + * count structure based on a directory update in progress. + */ +STATIC int +xchk_nlinks_live_update( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_dir_update_params *p = data; + struct xchk_nlink_ctrs *xnc; + int error; + + xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); + + trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, + p->delta, p->name->name, p->name->len); + + /* + * If we've already scanned @dp, update the number of parents that link + * to @ip. If @ip is a subdirectory, update the number of child links + * going out of @dp. + */ + if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { + mutex_lock(&xnc->lock); + error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, + 0, 0); + if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) + error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, + 0, p->delta); + mutex_unlock(&xnc->lock); + if (error) + goto out_abort; + } + + /* + * If @ip is a subdirectory and we've already scanned it, update the + * number of backrefs pointing to @dp. + */ + if (S_ISDIR(VFS_IC(p->ip)->i_mode) && + xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { + mutex_lock(&xnc->lock); + error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, + p->delta, 0); + mutex_unlock(&xnc->lock); + if (error) + goto out_abort; + } + + return NOTIFY_DONE; + +out_abort: + xchk_iscan_abort(&xnc->collect_iscan); + return NOTIFY_DONE; +} + /* Bump the observed link count for the inode referenced by this entry. */ STATIC int xchk_nlinks_collect_dirent( @@ -747,6 +818,11 @@ xchk_nlinks_teardown_scan( { struct xchk_nlink_ctrs *xnc = priv; + /* Discourage any hook functions that might be running. */ + xchk_iscan_abort(&xnc->collect_iscan); + + xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); + xfarray_destroy(xnc->nlinks); xnc->nlinks = NULL; @@ -793,6 +869,19 @@ xchk_nlinks_setup_scan( if (error) goto out_teardown; + /* + * Hook into the directory entry code so that we can capture updates to + * file link counts. The hook only triggers for inodes that were + * already scanned, and the scanner thread takes each inode's ILOCK, + * which means that any in-progress inode updates will finish before we + * can scan the inode. + */ + ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); + xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); + error = xfs_dir_hook_add(mp, &xnc->dhook); + if (error) + goto out_teardown; + /* Use deferred cleanup to pass the inode link count data to repair. */ sc->buf_cleanup = xchk_nlinks_teardown_scan; return 0; diff --git a/fs/xfs/scrub/nlinks.h b/fs/xfs/scrub/nlinks.h index 69a3460c5e52..ea6408e1c183 100644 --- a/fs/xfs/scrub/nlinks.h +++ b/fs/xfs/scrub/nlinks.h @@ -22,6 +22,12 @@ struct xchk_nlink_ctrs { */ struct xchk_iscan collect_iscan; struct xchk_iscan compare_iscan; + + /* + * Hook into directory updates so that we can receive live updates + * from other writer threads. + */ + struct xfs_dir_hook dhook; }; /* diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 8c60774d5f34..883c47b6c686 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -160,6 +160,9 @@ xchk_fsgates_disable( if (sc->flags & XCHK_FSGATES_QUOTA) xfs_dqtrx_hook_disable(); + if (sc->flags & XCHK_FSGATES_DIRENTS) + xfs_dir_hook_disable(); + sc->flags &= ~XCHK_FSGATES_ALL; } diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index de6b45f99dd5..f99a3c21d02e 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -122,6 +122,7 @@ struct xfs_scrub { #define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */ #define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */ #define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */ +#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */ #define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ @@ -132,7 +133,8 @@ struct xfs_scrub { * must be enabled during scrub setup and can only be torn down afterwards. */ #define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \ - XCHK_FSGATES_QUOTA) + XCHK_FSGATES_QUOTA | \ + XCHK_FSGATES_DIRENTS) /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 0deea8f18a30..9512170ea9a7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -116,6 +116,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS); { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \ { XCHK_FSGATES_QUOTA, "fsgates_quota" }, \ + { XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \ { XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \ { XREP_ALREADY_FIXED, "already_fixed" } @@ -1363,6 +1364,38 @@ TRACE_EVENT(xchk_nlinks_collect_metafile, __entry->ino) ); +TRACE_EVENT(xchk_nlinks_live_update, + TP_PROTO(struct xfs_mount *mp, const struct xfs_inode *dp, + int action, xfs_ino_t ino, int delta, + const char *name, unsigned int namelen), + TP_ARGS(mp, dp, action, ino, delta, name, namelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dir) + __field(int, action) + __field(xfs_ino_t, ino) + __field(int, delta) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->dir = dp ? dp->i_ino : NULLFSINO; + __entry->action = action; + __entry->ino = ino; + __entry->delta = delta; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + ), + TP_printk("dev %d:%d dir 0x%llx ino 0x%llx nlink_delta %d name '%.*s'", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dir, + __entry->ino, + __entry->delta, + __entry->namelen, + __get_str(name)) +); + TRACE_EVENT(xchk_nlinks_check_zero, TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, const struct xchk_nlink *live), diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d6635d219527..e8845287debd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -925,6 +925,81 @@ xfs_bumplink( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } +#ifdef CONFIG_XFS_LIVE_HOOKS +/* + * Use a static key here to reduce the overhead of directory live update hooks. + * If the compiler supports jump labels, the static branch will be replaced by + * a nop sled when there are no hook users. Online fsck is currently the only + * caller, so this is a reasonable tradeoff. + * + * Note: Patching the kernel code requires taking the cpu hotplug lock. Other + * parts of the kernel allocate memory with that lock held, which means that + * XFS callers cannot hold any locks that might be used by memory reclaim or + * writeback when calling the static_branch_{inc,dec} functions. + */ +DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch); + +void +xfs_dir_hook_disable(void) +{ + xfs_hooks_switch_off(&xfs_dir_hooks_switch); +} + +void +xfs_dir_hook_enable(void) +{ + xfs_hooks_switch_on(&xfs_dir_hooks_switch); +} + +/* Call hooks for a directory update relating to a child dirent update. */ +inline void +xfs_dir_update_hook( + struct xfs_inode *dp, + struct xfs_inode *ip, + int delta, + const struct xfs_name *name) +{ + if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) { + struct xfs_dir_update_params p = { + .dp = dp, + .ip = ip, + .delta = delta, + .name = name, + }; + struct xfs_mount *mp = ip->i_mount; + + xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p); + } +} + +/* Call the specified function during a directory update. */ +int +xfs_dir_hook_add( + struct xfs_mount *mp, + struct xfs_dir_hook *hook) +{ + return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook); +} + +/* Stop calling the specified function during a directory update. */ +void +xfs_dir_hook_del( + struct xfs_mount *mp, + struct xfs_dir_hook *hook) +{ + xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook); +} + +/* Configure directory update hook functions. */ +void +xfs_dir_hook_setup( + struct xfs_dir_hook *hook, + notifier_fn_t mod_fn) +{ + xfs_hook_setup(&hook->dirent_hook, mod_fn); +} +#endif /* CONFIG_XFS_LIVE_HOOKS */ + int xfs_create( struct mnt_idmap *idmap, @@ -1035,6 +1110,12 @@ xfs_create( xfs_bumplink(tp, dp); } + /* + * Create ip with a reference from dp, and add '.' and '..' references + * if it's a directory. + */ + xfs_dir_update_hook(dp, ip, 1, name); + /* * If this is a synchronous mount, make sure that the * create transaction goes to disk before returning to @@ -1249,6 +1330,7 @@ xfs_link( xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); xfs_bumplink(tp, sip); + xfs_dir_update_hook(tdp, sip, 1, target_name); /* * If this is a synchronous mount, make sure that the @@ -2562,6 +2644,12 @@ xfs_remove( goto out_trans_cancel; } + /* + * Drop the link from dp to ip, and if ip was a directory, remove the + * '.' and '..' references since we freed the directory. + */ + xfs_dir_update_hook(dp, ip, -1, name); + /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to @@ -2752,6 +2840,20 @@ xfs_cross_rename( } xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE); + + /* + * Inform our hook clients that we've finished an exchange operation as + * follows: removed the source and target files from their directories; + * added the target to the source directory; and added the source to + * the target directory. All inodes are locked, so it's ok to model a + * rename this way so long as we say we deleted entries before we add + * new ones. + */ + xfs_dir_update_hook(dp1, ip1, -1, name1); + xfs_dir_update_hook(dp2, ip2, -1, name2); + xfs_dir_update_hook(dp1, ip2, 1, name1); + xfs_dir_update_hook(dp2, ip1, 1, name2); + return xfs_finish_rename(tp); out_trans_abort: @@ -3135,6 +3237,21 @@ retry: if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); + /* + * Inform our hook clients that we've finished a rename operation as + * follows: removed the source and target files from their directories; + * that we've added the source to the target directory; and finally + * that we've added the whiteout, if there was one. All inodes are + * locked, so it's ok to model a rename this way so long as we say we + * deleted entries before we add new ones. + */ + if (target_ip) + xfs_dir_update_hook(target_dp, target_ip, -1, target_name); + xfs_dir_update_hook(src_dp, src_ip, -1, src_name); + xfs_dir_update_hook(target_dp, src_ip, 1, target_name); + if (wip) + xfs_dir_update_hook(src_dp, wip, 1, src_name); + error = xfs_finish_rename(tp); if (wip) xfs_irele(wip); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7bbdc7009e7d..ab46ffb3ac19 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -171,6 +171,12 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) return &ip->i_vnode; } +/* convert from const xfs inode to const vfs inode */ +static inline const struct inode *VFS_IC(const struct xfs_inode *ip) +{ + return &ip->i_vnode; +} + /* * For regular files we only update the on-disk filesize when actually * writing data back to disk. Until then only the copy in the VFS inode @@ -626,4 +632,29 @@ bool xfs_ifork_zapped(const struct xfs_inode *ip, int whichfork); void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, xfs_filblks_t *dblocks, xfs_filblks_t *rblocks); +struct xfs_dir_update_params { + const struct xfs_inode *dp; + const struct xfs_inode *ip; + const struct xfs_name *name; + int delta; +}; + +#ifdef CONFIG_XFS_LIVE_HOOKS +void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip, + int delta, const struct xfs_name *name); + +struct xfs_dir_hook { + struct xfs_hook dirent_hook; +}; + +void xfs_dir_hook_disable(void); +void xfs_dir_hook_enable(void); + +int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook); +void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook); +void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn); +#else +# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 503fe3c7edbf..e86dfe67894f 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -252,6 +252,9 @@ typedef struct xfs_mount { /* cpus that have inodes queued for inactivation */ struct cpumask m_inodegc_cpumask; + + /* Hook to feed dirent updates to an active online repair. */ + struct xfs_hooks m_dir_update_hooks; } xfs_mount_t; #define M_IGEO(mp) (&(mp)->m_ino_geo) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b31652fa7004..74e87ed5eee1 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2011,6 +2011,8 @@ static int xfs_init_fs_context( mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ + xfs_hooks_init(&mp->m_dir_update_hooks); + fc->s_fs_info = mp; fc->ops = &xfs_context_ops; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index c2dc8c501bdc..e73692fbe179 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -322,6 +322,7 @@ xfs_symlink( goto out_trans_cancel; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); + xfs_dir_update_hook(dp, ip, 1, link_name); /* * If this is a synchronous mount, make sure that the