jbd2: continue to record log between each mount

For a newly mounted file system, the journal committing thread always
record new transactions from the start of the journal area, no matter
whether the journal was clean or just has been recovered. So the logdump
code in debugfs cannot dump continuous logs between each mount, it is
disadvantageous to analysis corrupted file system image and locate the
file system inconsistency bugs.

If we get a corrupted file system in the running products and want to
find out what has happened, besides lookup the system log, one effective
way is to backtrack the journal log. But we may not always run e2fsck
before each mount and the default fsck -a mode also cannot always
checkout all inconsistencies, so it could left over some inconsistencies
into the next mount until we detect it. Finally, transactions in the
journal may probably discontinuous and some relatively new transactions
has been covered, it becomes hard to analyse. If we could record
transactions continuously between each mount, we could acquire more
useful info from the journal. Like this:

 |Previous mount checkpointed/recovered logs|Current mount logs         |
 |{------}{---}{--------} ... {------}| ... |{======}{========}...000000|

And yes the journal area is limited and cannot record everything, the
problematic transaction may also be covered even if we do this, but
this is still useful for fuzzy tests and short-running products.

This patch save the head blocknr in the superblock after flushing the
journal or unmounting the file system, let the next mount could continue
to record new transaction behind it. This change is backward compatible
because the old kernel does not care about the head blocknr of the
journal. It is also fine if we mount a clean old image without valid
head blocknr, we fail back to set it to s_first just like before.
Finally, for the case of mount an unclean file system, we could also get
the journal head easily after scanning/replaying the journal, it will
continue to record new transaction after the recovered transactions.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230322013353.1843306-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
Zhang Yi 2023-03-22 09:33:51 +08:00 committed by Theodore Ts'o
parent 04c2e98179
commit c7fc605558
3 changed files with 40 additions and 9 deletions

View File

@ -1559,8 +1559,21 @@ static int journal_reset(journal_t *journal)
journal->j_first = first;
journal->j_last = last;
journal->j_head = journal->j_first;
journal->j_tail = journal->j_first;
if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) {
/*
* Disable the cycled recording mode if the journal head block
* number is not correct.
*/
if (journal->j_head < first || journal->j_head >= last) {
printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, "
"disable journal_cycle_record\n",
journal->j_head);
journal->j_head = journal->j_first;
}
} else {
journal->j_head = journal->j_first;
}
journal->j_tail = journal->j_head;
journal->j_free = journal->j_last - journal->j_first;
journal->j_tail_sequence = journal->j_transaction_sequence;
@ -1732,6 +1745,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0);
sb->s_head = cpu_to_be32(journal->j_head);
if (jbd2_has_feature_fast_commit(journal)) {
/*
* When journal is clean, no need to commit fast commit flag and

View File

@ -29,6 +29,7 @@ struct recovery_info
{
tid_t start_transaction;
tid_t end_transaction;
unsigned long head_block;
int nr_replays;
int nr_revokes;
@ -301,11 +302,11 @@ int jbd2_journal_recover(journal_t *journal)
* is always zero if, and only if, the journal was cleanly
* unmounted.
*/
if (!sb->s_start) {
jbd2_debug(1, "No recovery required, last transaction %d\n",
be32_to_cpu(sb->s_sequence));
jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
journal->j_head = be32_to_cpu(sb->s_head);
return 0;
}
@ -324,6 +325,9 @@ int jbd2_journal_recover(journal_t *journal)
/* Restart the log at the next transaction ID, thus invalidating
* any existing commit records in the log. */
journal->j_transaction_sequence = ++info.end_transaction;
journal->j_head = info.head_block;
jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
journal->j_transaction_sequence, journal->j_head);
jbd2_journal_clear_revoke(journal);
err2 = sync_blockdev(journal->j_fs_dev);
@ -364,6 +368,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
if (err) {
printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
++journal->j_transaction_sequence;
journal->j_head = journal->j_first;
} else {
#ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction -
@ -373,6 +378,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
dropped, (dropped == 1) ? "" : "s");
#endif
journal->j_transaction_sequence = ++info.end_transaction;
journal->j_head = info.head_block;
}
journal->j_tail = 0;
@ -462,7 +468,7 @@ static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass)
{
unsigned int first_commit_ID, next_commit_ID;
unsigned long next_log_block;
unsigned long next_log_block, head_block;
int err, success = 0;
journal_superblock_t * sb;
journal_header_t * tmp;
@ -485,6 +491,7 @@ static int do_one_pass(journal_t *journal,
sb = journal->j_superblock;
next_commit_ID = be32_to_cpu(sb->s_sequence);
next_log_block = be32_to_cpu(sb->s_start);
head_block = next_log_block;
first_commit_ID = next_commit_ID;
if (pass == PASS_SCAN)
@ -809,6 +816,7 @@ static int do_one_pass(journal_t *journal,
if (commit_time < last_trans_commit_time)
goto ignore_crc_mismatch;
info->end_transaction = next_commit_ID;
info->head_block = head_block;
if (!jbd2_has_feature_async_commit(journal)) {
journal->j_failed_commit =
@ -817,8 +825,10 @@ static int do_one_pass(journal_t *journal,
break;
}
}
if (pass == PASS_SCAN)
if (pass == PASS_SCAN) {
last_trans_commit_time = commit_time;
head_block = next_log_block;
}
brelse(bh);
next_commit_ID++;
continue;
@ -868,6 +878,8 @@ static int do_one_pass(journal_t *journal,
if (pass == PASS_SCAN) {
if (!info->end_transaction)
info->end_transaction = next_commit_ID;
if (!info->head_block)
info->head_block = head_block;
} else {
/* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */

View File

@ -265,8 +265,10 @@ typedef struct journal_superblock_s
__u8 s_padding2[3];
/* 0x0054 */
__be32 s_num_fc_blks; /* Number of fast commit blocks */
/* 0x0058 */
__u32 s_padding[41];
__be32 s_head; /* blocknr of head of log, only uptodate
* while the filesystem is clean */
/* 0x005C */
__u32 s_padding[40];
__be32 s_checksum; /* crc32c(superblock) */
/* 0x0100 */
@ -1395,6 +1397,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT)
#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file
* data write error in ordered
* mode */
#define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on
* clean and empty filesystem
* logging area */
#define JBD2_FAST_COMMIT_ONGOING 0x100 /* Fast commit is ongoing */
#define JBD2_FULL_COMMIT_ONGOING 0x200 /* Full commit is ongoing */
#define JBD2_JOURNAL_FLUSH_DISCARD 0x0001