mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-27 12:57:53 +00:00
3324249e6e
commite6fff81e48
upstream. When we replay unfinished intent items that have been recovered from the log, it's possible that the replay will cause the creation of more deferred work items. As outlined in commit509955823c
("xfs: log recovery should replay deferred ops in order"), later work items have an implicit ordering dependency on earlier work items. Therefore, recovery must replay the items (both recovered and created) in the same order that they would have been during normal operation. For log recovery, we enforce this ordering by using an empty transaction to collect deferred ops that get created in the process of recovering a log intent item to prevent them from being committed before the rest of the recovered intent items. After we finish committing all the recovered log items, we allocate a transaction with an enormous block reservation, splice our huge list of created deferred ops into that transaction, and commit it, thereby finishing all those ops. This is /really/ hokey -- it's the one place in XFS where we allow nested transactions; the splicing of the defer ops list is is inelegant and has to be done twice per recovery function; and the broken way we handle inode pointers and block reservations cause subtle use-after-free and allocator problems that will be fixed by this patch and the two patches after it. Therefore, replace the hokey empty transaction with a structure designed to capture each chain of deferred ops that are created as part of recovering a single unfinished log intent. Finally, refactor the loop that replays those chains to do so using one transaction per chain. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Chandan Babu R <chandan.babu@oracle.com> Acked-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
88 lines
2.9 KiB
C
88 lines
2.9 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
*/
|
|
#ifndef __XFS_RMAP_ITEM_H__
|
|
#define __XFS_RMAP_ITEM_H__
|
|
|
|
/*
|
|
* There are (currently) three pairs of rmap btree redo item types: map, unmap,
|
|
* and convert. The common abbreviations for these are RUI (rmap update
|
|
* intent) and RUD (rmap update done). The redo item type is encoded in the
|
|
* flags field of each xfs_map_extent.
|
|
*
|
|
* *I items should be recorded in the *first* of a series of rolled
|
|
* transactions, and the *D items should be recorded in the same transaction
|
|
* that records the associated rmapbt updates. Typically, the first
|
|
* transaction will record a bmbt update, followed by some number of
|
|
* transactions containing rmapbt updates, and finally transactions with any
|
|
* bnobt/cntbt updates.
|
|
*
|
|
* Should the system crash after the commit of the first transaction but
|
|
* before the commit of the final transaction in a series, log recovery will
|
|
* use the redo information recorded by the intent items to replay the
|
|
* (rmapbt/bnobt/cntbt) metadata updates in the non-first transaction.
|
|
*/
|
|
|
|
/* kernel only RUI/RUD definitions */
|
|
|
|
struct xfs_mount;
|
|
struct kmem_zone;
|
|
|
|
/*
|
|
* Max number of extents in fast allocation path.
|
|
*/
|
|
#define XFS_RUI_MAX_FAST_EXTENTS 16
|
|
|
|
/*
|
|
* Define RUI flag bits. Manipulated by set/clear/test_bit operators.
|
|
*/
|
|
#define XFS_RUI_RECOVERED 1
|
|
|
|
/*
|
|
* This is the "rmap update intent" log item. It is used to log the fact that
|
|
* some reverse mappings need to change. It is used in conjunction with the
|
|
* "rmap update done" log item described below.
|
|
*
|
|
* These log items follow the same rules as struct xfs_efi_log_item; see the
|
|
* comments about that structure (in xfs_extfree_item.h) for more details.
|
|
*/
|
|
struct xfs_rui_log_item {
|
|
struct xfs_log_item rui_item;
|
|
atomic_t rui_refcount;
|
|
atomic_t rui_next_extent;
|
|
unsigned long rui_flags; /* misc flags */
|
|
struct xfs_rui_log_format rui_format;
|
|
};
|
|
|
|
static inline size_t
|
|
xfs_rui_log_item_sizeof(
|
|
unsigned int nr)
|
|
{
|
|
return offsetof(struct xfs_rui_log_item, rui_format) +
|
|
xfs_rui_log_format_sizeof(nr);
|
|
}
|
|
|
|
/*
|
|
* This is the "rmap update done" log item. It is used to log the fact that
|
|
* some rmapbt updates mentioned in an earlier rui item have been performed.
|
|
*/
|
|
struct xfs_rud_log_item {
|
|
struct xfs_log_item rud_item;
|
|
struct xfs_rui_log_item *rud_ruip;
|
|
struct xfs_rud_log_format rud_format;
|
|
};
|
|
|
|
extern struct kmem_zone *xfs_rui_zone;
|
|
extern struct kmem_zone *xfs_rud_zone;
|
|
|
|
struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
|
|
int xfs_rui_copy_format(struct xfs_log_iovec *buf,
|
|
struct xfs_rui_log_format *dst_rui_fmt);
|
|
void xfs_rui_item_free(struct xfs_rui_log_item *);
|
|
void xfs_rui_release(struct xfs_rui_log_item *);
|
|
int xfs_rui_recover(struct xfs_rui_log_item *ruip,
|
|
struct list_head *capture_list);
|
|
|
|
#endif /* __XFS_RMAP_ITEM_H__ */
|