mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-30 22:26:55 +00:00
bcachefs: Simplify journal replay
With BTREE_ITER_WITH_JOURNAL, there's no longer any restrictions on the order we have to replay keys from the journal in, and we can also start up journal reclaim right away - and delete a bunch of code. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
parent
8e432d98a5
commit
d8601afca8
8 changed files with 22 additions and 114 deletions
|
@ -902,8 +902,7 @@ static void discard_one_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
|
|||
static bool allocator_thread_running(struct bch_dev *ca)
|
||||
{
|
||||
unsigned state = ca->mi.state == BCH_MEMBER_STATE_rw &&
|
||||
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags) &&
|
||||
test_bit(BCH_FS_ALLOC_REPLAY_DONE, &ca->fs->flags)
|
||||
test_bit(BCH_FS_ALLOCATOR_RUNNING, &ca->fs->flags)
|
||||
? ALLOCATOR_running
|
||||
: ALLOCATOR_stopped;
|
||||
alloc_thread_set_state(ca, state);
|
||||
|
|
|
@ -510,8 +510,6 @@ enum {
|
|||
BCH_FS_INITIAL_GC_DONE,
|
||||
BCH_FS_INITIAL_GC_UNFIXED,
|
||||
BCH_FS_TOPOLOGY_REPAIR_DONE,
|
||||
BCH_FS_ALLOC_REPLAY_DONE,
|
||||
BCH_FS_BTREE_INTERIOR_REPLAY_DONE,
|
||||
BCH_FS_FSCK_DONE,
|
||||
BCH_FS_STARTED,
|
||||
BCH_FS_RW,
|
||||
|
|
|
@ -16,8 +16,7 @@ static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c)
|
|||
size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
|
||||
size_t max_dirty = 4096 + (nr_keys * 3) / 4;
|
||||
|
||||
return nr_dirty > max_dirty &&
|
||||
test_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
|
||||
return nr_dirty > max_dirty;
|
||||
}
|
||||
|
||||
int bch2_btree_key_cache_journal_flush(struct journal *,
|
||||
|
|
|
@ -45,7 +45,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
|
|||
|
||||
BUG_ON(!b->c.level);
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
|
||||
return;
|
||||
|
||||
bch2_btree_node_iter_init_from_start(&iter, b);
|
||||
|
@ -1851,9 +1851,6 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
|
|||
{
|
||||
struct async_btree_rewrite *a;
|
||||
|
||||
if (!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags))
|
||||
return;
|
||||
|
||||
if (!percpu_ref_tryget(&c->writes))
|
||||
return;
|
||||
|
||||
|
|
|
@ -206,9 +206,6 @@ static bool btree_insert_key_leaf(struct btree_trans *trans,
|
|||
int old_live_u64s = b->nr.live_u64s;
|
||||
int live_u64s_added, u64s_added;
|
||||
|
||||
EBUG_ON(!insert->level &&
|
||||
!test_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags));
|
||||
|
||||
if (unlikely(!bch2_btree_bset_insert_key(trans, insert->path, b,
|
||||
&insert_l(insert)->iter, insert->k)))
|
||||
return false;
|
||||
|
|
|
@ -489,9 +489,6 @@ static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
|
|||
u64 seq;
|
||||
int err;
|
||||
|
||||
if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
|
||||
return 0;
|
||||
|
||||
lockdep_assert_held(&j->reclaim_lock);
|
||||
|
||||
while (1) {
|
||||
|
@ -689,8 +686,6 @@ static int bch2_journal_reclaim_thread(void *arg)
|
|||
|
||||
set_freezable();
|
||||
|
||||
kthread_wait_freezable(test_bit(JOURNAL_RECLAIM_STARTED, &j->flags));
|
||||
|
||||
j->last_flushed = jiffies;
|
||||
|
||||
while (!ret && !kthread_should_stop()) {
|
||||
|
|
|
@ -148,7 +148,6 @@ enum journal_space_from {
|
|||
enum {
|
||||
JOURNAL_REPLAY_DONE,
|
||||
JOURNAL_STARTED,
|
||||
JOURNAL_RECLAIM_STARTED,
|
||||
JOURNAL_NEED_WRITE,
|
||||
JOURNAL_MAY_GET_UNRESERVED,
|
||||
JOURNAL_MAY_SKIP_FLUSH,
|
||||
|
|
|
@ -474,8 +474,8 @@ static void replay_now_at(struct journal *j, u64 seq)
|
|||
bch2_journal_pin_put(j, j->replay_journal_seq++);
|
||||
}
|
||||
|
||||
static int __bch2_journal_replay_key(struct btree_trans *trans,
|
||||
struct journal_key *k)
|
||||
static int bch2_journal_replay_key(struct btree_trans *trans,
|
||||
struct journal_key *k)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
unsigned iter_flags =
|
||||
|
@ -484,7 +484,7 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
|
|||
int ret;
|
||||
|
||||
if (!k->level && k->btree_id == BTREE_ID_alloc)
|
||||
iter_flags |= BTREE_ITER_CACHED|BTREE_ITER_CACHED_NOFILL;
|
||||
iter_flags |= BTREE_ITER_CACHED;
|
||||
|
||||
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
|
||||
BTREE_MAX_DEPTH, k->level,
|
||||
|
@ -503,29 +503,12 @@ static int __bch2_journal_replay_key(struct btree_trans *trans,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_journal_replay_key(struct bch_fs *c, struct journal_key *k)
|
||||
{
|
||||
unsigned commit_flags =
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED;
|
||||
|
||||
if (!k->allocated)
|
||||
commit_flags |= BTREE_INSERT_JOURNAL_REPLAY;
|
||||
|
||||
return bch2_trans_do(c, NULL, NULL, commit_flags,
|
||||
__bch2_journal_replay_key(&trans, k));
|
||||
}
|
||||
|
||||
static int journal_sort_seq_cmp(const void *_l, const void *_r)
|
||||
{
|
||||
const struct journal_key *l = *((const struct journal_key **)_l);
|
||||
const struct journal_key *r = *((const struct journal_key **)_r);
|
||||
|
||||
return cmp_int(r->level, l->level) ?:
|
||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||
cmp_int(l->btree_id, r->btree_id) ?:
|
||||
bpos_cmp(l->k->k.p, r->k->k.p);
|
||||
return cmp_int(l->journal_seq, r->journal_seq);
|
||||
}
|
||||
|
||||
static int bch2_journal_replay(struct bch_fs *c)
|
||||
|
@ -533,10 +516,7 @@ static int bch2_journal_replay(struct bch_fs *c)
|
|||
struct journal_keys *keys = &c->journal_keys;
|
||||
struct journal_key **keys_sorted, *k;
|
||||
struct journal *j = &c->journal;
|
||||
struct bch_dev *ca;
|
||||
unsigned idx;
|
||||
size_t i;
|
||||
u64 seq;
|
||||
int ret;
|
||||
|
||||
keys_sorted = kmalloc_array(sizeof(*keys_sorted), keys->nr, GFP_KERNEL);
|
||||
|
@ -555,73 +535,25 @@ static int bch2_journal_replay(struct bch_fs *c)
|
|||
replay_now_at(j, keys->journal_seq_base);
|
||||
}
|
||||
|
||||
seq = j->replay_journal_seq;
|
||||
|
||||
/*
|
||||
* First replay updates to the alloc btree - these will only update the
|
||||
* btree key cache:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (!k->level && k->btree_id == BTREE_ID_alloc) {
|
||||
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
if (!k->allocated)
|
||||
replay_now_at(j, keys->journal_seq_base + k->journal_seq);
|
||||
|
||||
/* Now we can start the allocator threads: */
|
||||
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
|
||||
for_each_member_device(ca, c, idx)
|
||||
bch2_wake_allocator(ca);
|
||||
|
||||
/*
|
||||
* Next replay updates to interior btree nodes:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (k->level) {
|
||||
j->replay_journal_seq = keys->journal_seq_base + k->journal_seq;
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that the btree is in a consistent state, we can start journal
|
||||
* reclaim (which will be flushing entries from the btree key cache back
|
||||
* to the btree:
|
||||
*/
|
||||
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
|
||||
set_bit(JOURNAL_RECLAIM_STARTED, &j->flags);
|
||||
journal_reclaim_kick(j);
|
||||
|
||||
j->replay_journal_seq = seq;
|
||||
|
||||
/*
|
||||
* Now replay leaf node updates:
|
||||
*/
|
||||
for (i = 0; i < keys->nr; i++) {
|
||||
k = keys_sorted[i];
|
||||
|
||||
cond_resched();
|
||||
|
||||
if (k->level || k->btree_id == BTREE_ID_alloc)
|
||||
continue;
|
||||
|
||||
replay_now_at(j, keys->journal_seq_base + k->journal_seq);
|
||||
|
||||
ret = bch2_journal_replay_key(c, k);
|
||||
if (ret)
|
||||
ret = bch2_trans_do(c, NULL, NULL,
|
||||
BTREE_INSERT_LAZY_RW|
|
||||
BTREE_INSERT_NOFAIL|
|
||||
BTREE_INSERT_JOURNAL_RESERVED|
|
||||
(!k->allocated ? BTREE_INSERT_JOURNAL_REPLAY : 0),
|
||||
bch2_journal_replay_key(&trans, k));
|
||||
if (ret) {
|
||||
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
|
||||
ret, bch2_btree_ids[k->btree_id], k->level);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
replay_now_at(j, j->replay_journal_seq_end);
|
||||
|
@ -629,14 +561,9 @@ static int bch2_journal_replay(struct bch_fs *c)
|
|||
|
||||
bch2_journal_set_replay_done(j);
|
||||
bch2_journal_flush_all_pins(j);
|
||||
kfree(keys_sorted);
|
||||
|
||||
return bch2_journal_error(j);
|
||||
ret = bch2_journal_error(j);
|
||||
err:
|
||||
bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
|
||||
ret, bch2_btree_ids[k->btree_id], k->level);
|
||||
kfree(keys_sorted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1215,7 +1142,8 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||
ret = bch2_journal_replay(c);
|
||||
if (ret)
|
||||
goto err;
|
||||
bch_verbose(c, "journal replay done");
|
||||
if (c->opts.verbose || !c->sb.clean)
|
||||
bch_info(c, "journal replay done");
|
||||
|
||||
if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
|
||||
!c->opts.nochanges) {
|
||||
|
@ -1385,10 +1313,6 @@ int bch2_fs_initialize(struct bch_fs *c)
|
|||
for (i = 0; i < BTREE_ID_NR; i++)
|
||||
bch2_btree_root_alloc(c, i);
|
||||
|
||||
set_bit(BCH_FS_ALLOC_REPLAY_DONE, &c->flags);
|
||||
set_bit(BCH_FS_BTREE_INTERIOR_REPLAY_DONE, &c->flags);
|
||||
set_bit(JOURNAL_RECLAIM_STARTED, &c->journal.flags);
|
||||
|
||||
err = "unable to allocate journal buckets";
|
||||
for_each_online_member(ca, c, i) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
|
|
Loading…
Reference in a new issue