From d8601afca840d36203d0cf2da94ce4f92003956e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 27 Dec 2021 23:10:06 -0500 Subject: [PATCH] bcachefs: Simplify journal replay With BTREE_ITER_WITH_JOURNAL, there's no longer any restrictions on the order we have to replay keys from the journal in, and we can also start up journal reclaim right away - and delete a bunch of code. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 +- fs/bcachefs/bcachefs.h | 2 - fs/bcachefs/btree_key_cache.h | 3 +- fs/bcachefs/btree_update_interior.c | 5 +- fs/bcachefs/btree_update_leaf.c | 3 - fs/bcachefs/journal_reclaim.c | 5 -- fs/bcachefs/journal_types.h | 1 - fs/bcachefs/recovery.c | 114 +++++----------------------- 8 files changed, 22 insertions(+), 114 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index cb4b059e796c..ab7d972aac3a 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -902,8 +902,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b) static bool allocator_thread_running(struct bch_dev *ca) { unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw && - test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) && - test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags) + test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) ? ALLOCATOR_running : ALLOCATOR_stopped; alloc_thread_set_state(ca, state); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 431cf25b38db..7771b4a4bb87 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -510,8 +510,6 @@ enum { BCH_FS_INITIAL_GC_DONE, BCH_FS_INITIAL_GC_UNFIXED, BCH_FS_TOPOLOGY_REPAIR_DONE, - BCH_FS_ALLOC_REPLAY_DONE, - BCH_FS_BTREE_INTERIOR_REPLAY_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, BCH_FS_RW, diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index 0768ef3ca776..b3d241b13453 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty && - test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags); + return nr_dirty > max_dirty; } int bch2_btree_key_cache_journal_flush(struct journal *, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 17111c4228bd..51a2ea2c5cd6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -45,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) BUG_ON(!b->c.level); - if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)) + if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)) return; bch2_btree_node_iter_init_from_start(&iter, b); @@ -1851,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) { struct async_btree_rewrite *a; - if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)) - return; - if (!percpu_ref_tryget(&c->writes)) return; diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index e95940ffad6b..1072acb0c9af 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; - EBUG_ON(!insert->level && - !test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags)); - if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b, &insert_l(insert)->iter, insert->k))) return false; diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 4462beb52461..d72b17dc935a 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush, u64 seq; int err; - if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags)) - return 0; - lockdep_assert_held(&j->reclaim_lock); while (1) { @@ -689,8 +686,6 @@ static int bch2_journal_reclaim_thread(void *arg) set_freezable(); - kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags)); - j->last_flushed = jiffies; while (!ret && !kthread_should_stop()) { diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 0c4df603280d..73e7fbc4f109 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -148,7 +148,6 @@ enum journal_space_from { enum { JOURNAL_REPLAY_DONE, JOURNAL_STARTED, - JOURNAL_RECLAIM_STARTED, JOURNAL_NEED_WRITE, JOURNAL_MAY_GET_UNRESERVED, JOURNAL_MAY_SKIP_FLUSH, diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 57311ad283c7..cb0ba84711aa 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -474,8 +474,8 @@ static void replay_now_at(struct journal *j, u64 seq) bch2_journal_pin_put(j, j->replay_journal_seq++); } -static int __bch2_journal_replay_key(struct btree_trans *trans, - struct journal_key *k) +static int bch2_journal_replay_key(struct btree_trans *trans, + struct journal_key *k) { struct btree_iter iter; unsigned iter_flags = @@ -484,7 +484,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, int ret; if (!k->level && k->btree_id == BTREE_ID_alloc) - iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL; + iter_flags |= BTREE_ITER_CACHED; bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, k->level, @@ -503,29 +503,12 @@ static int __bch2_journal_replay_key(struct btree_trans *trans, return ret; } -static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k) -{ - unsigned commit_flags = - BTREE_INSERT_LAZY_RW| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_JOURNAL_RESERVED; - - if (!k->allocated) - commit_flags |= BTREE_INSERT_JOURNAL_REPLAY; - - return bch2_trans_do(c, NULL, NULL, commit_flags, - __bch2_journal_replay_key(&trans, k)); -} - static int journal_sort_seq_cmp(const void *_l, const void *_r) { const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - return cmp_int(r->level, l->level) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->btree_id, r->btree_id) ?: - bpos_cmp(l->k->k.p, r->k->k.p); + return cmp_int(l->journal_seq, r->journal_seq); } static int bch2_journal_replay(struct bch_fs *c) @@ -533,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; struct journal_key **keys_sorted, *k; struct journal *j = &c->journal; - struct bch_dev *ca; - unsigned idx; size_t i; - u64 seq; int ret; keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL); @@ -555,73 +535,25 @@ static int bch2_journal_replay(struct bch_fs *c) replay_now_at(j, keys->journal_seq_base); } - seq = j->replay_journal_seq; - - /* - * First replay updates to the alloc btree - these will only update the - * btree key cache: - */ for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; cond_resched(); - if (!k->level && k->btree_id == BTREE_ID_alloc) { - j->replay_journal_seq = keys->journal_seq_base + k->journal_seq; - ret = bch2_journal_replay_key(c, k); - if (ret) - goto err; - } - } + if (!k->allocated) + replay_now_at(j, keys->journal_seq_base + k->journal_seq); - /* Now we can start the allocator threads: */ - set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags); - for_each_member_device(ca, c, idx) - bch2_wake_allocator(ca); - - /* - * Next replay updates to interior btree nodes: - */ - for (i = 0; i < keys->nr; i++) { - k = keys_sorted[i]; - - cond_resched(); - - if (k->level) { - j->replay_journal_seq = keys->journal_seq_base + k->journal_seq; - ret = bch2_journal_replay_key(c, k); - if (ret) - goto err; - } - } - - /* - * Now that the btree is in a consistent state, we can start journal - * reclaim (which will be flushing entries from the btree key cache back - * to the btree: - */ - set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags); - set_bit(JOURNAL_RECLAIM_STARTED, &j->flags); - journal_reclaim_kick(j); - - j->replay_journal_seq = seq; - - /* - * Now replay leaf node updates: - */ - for (i = 0; i < keys->nr; i++) { - k = keys_sorted[i]; - - cond_resched(); - - if (k->level || k->btree_id == BTREE_ID_alloc) - continue; - - replay_now_at(j, keys->journal_seq_base + k->journal_seq); - - ret = bch2_journal_replay_key(c, k); - if (ret) + ret = bch2_trans_do(c, NULL, NULL, + BTREE_INSERT_LAZY_RW| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_JOURNAL_RESERVED| + (!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0), + bch2_journal_replay_key(&trans, k)); + if (ret) { + bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", + ret, bch2_btree_ids[k->btree_id], k->level); goto err; + } } replay_now_at(j, j->replay_journal_seq_end); @@ -629,14 +561,9 @@ static int bch2_journal_replay(struct bch_fs *c) bch2_journal_set_replay_done(j); bch2_journal_flush_all_pins(j); - kfree(keys_sorted); - - return bch2_journal_error(j); + ret = bch2_journal_error(j); err: - bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", - ret, bch2_btree_ids[k->btree_id], k->level); kfree(keys_sorted); - return ret; } @@ -1215,7 +1142,8 @@ int bch2_fs_recovery(struct bch_fs *c) ret = bch2_journal_replay(c); if (ret) goto err; - bch_verbose(c, "journal replay done"); + if (c->opts.verbose || !c->sb.clean) + bch_info(c, "journal replay done"); if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) && !c->opts.nochanges) { @@ -1385,10 +1313,6 @@ int bch2_fs_initialize(struct bch_fs *c) for (i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc(c, i); - set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags); - set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags); - set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags); - err = "unable to allocate journal buckets"; for_each_online_member(ca, c, i) { ret = bch2_dev_journal_alloc(ca);