bcachefs: New locking functions

In the future, with the new deadlock cycle detector, we won't be using
bare six_lock_* anymore: lock wait entries will all be embedded in
btree_trans, and we will need a btree_trans context whenever locking a
btree node.

This patch plumbs a btree_trans to the few places that need it, and adds
two new locking functions
 - btree_node_lock_nopath, which may fail returning a transaction
   restart, and
 - btree_node_lock_nopath_nofail, to be used in places where we know we
   cannot deadlock (i.e. because we're holding no other locks).

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-08-21 14:29:43 -04:00 committed by Kent Overstreet
parent 546180874a
commit ca7d8fcabf
10 changed files with 182 additions and 96 deletions

View file

@ -959,12 +959,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
return b;
}
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
const struct bkey_i *k,
enum btree_id btree_id,
unsigned level,
bool nofill)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
@ -998,9 +999,14 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
goto out;
} else {
lock_node:
ret = six_lock_read(&b->c.lock, lock_node_check_fn, (void *) k);
if (ret)
goto retry;
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ERR_PTR(ret);
BUG();
}
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->c.btree_id != btree_id ||
@ -1062,8 +1068,9 @@ int bch2_btree_node_prefetch(struct bch_fs *c,
return PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
{
struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
@ -1079,8 +1086,8 @@ void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
__bch2_btree_node_wait_on_read(b);
__bch2_btree_node_wait_on_write(b);
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
if (btree_node_dirty(b)) {
__bch2_btree_node_write(c, b, 0);

View file

@ -26,13 +26,13 @@ struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_path *,
const struct bkey_i *, unsigned,
enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
struct btree *bch2_btree_node_get_noiter(struct btree_trans *, const struct bkey_i *,
enum btree_id, unsigned, bool);
int bch2_btree_node_prefetch(struct bch_fs *, struct btree_trans *, struct btree_path *,
const struct bkey_i *, enum btree_id, unsigned);
void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
void bch2_btree_node_evict(struct btree_trans *, const struct bkey_i *);
void bch2_fs_btree_cache_exit(struct bch_fs *);
int bch2_fs_btree_cache_init(struct bch_fs *);

View file

@ -165,10 +165,11 @@ static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
}
}
static void bch2_btree_node_update_key_early(struct bch_fs *c,
static void bch2_btree_node_update_key_early(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_i *new)
{
struct bch_fs *c = trans->c;
struct btree *b;
struct bkey_buf tmp;
int ret;
@ -176,7 +177,7 @@ static void bch2_btree_node_update_key_early(struct bch_fs *c,
bch2_bkey_buf_init(&tmp);
bch2_bkey_buf_reassemble(&tmp, c, old);
b = bch2_btree_node_get_noiter(c, tmp.k, btree, level, true);
b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true);
if (!IS_ERR_OR_NULL(b)) {
mutex_lock(&c->btree_cache.lock);
@ -352,8 +353,9 @@ static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
return ret;
}
static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
{
struct bch_fs *c = trans->c;
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bkey_buf prev_k, cur_k;
@ -378,7 +380,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
bch2_btree_and_journal_iter_advance(&iter);
bch2_bkey_buf_reassemble(&cur_k, c, k);
cur = bch2_btree_node_get_noiter(c, cur_k.k,
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
@ -392,7 +394,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
bch2_btree_ids[b->c.btree_id],
b->c.level - 1,
buf.buf)) {
bch2_btree_node_evict(c, cur_k.k);
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL;
@ -411,7 +413,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
if (ret == DROP_THIS_NODE) {
six_unlock_read(&cur->c.lock);
bch2_btree_node_evict(c, cur_k.k);
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
cur = NULL;
@ -425,7 +427,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
prev = NULL;
if (ret == DROP_PREV_NODE) {
bch2_btree_node_evict(c, prev_k.k);
bch2_btree_node_evict(trans, prev_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, prev_k.k->k.p);
if (ret)
@ -465,7 +467,7 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
bch2_bkey_buf_reassemble(&cur_k, c, k);
bch2_btree_and_journal_iter_advance(&iter);
cur = bch2_btree_node_get_noiter(c, cur_k.k,
cur = bch2_btree_node_get_noiter(trans, cur_k.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(cur);
@ -476,12 +478,12 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
goto err;
}
ret = bch2_btree_repair_topology_recurse(c, cur);
ret = bch2_btree_repair_topology_recurse(trans, cur);
six_unlock_read(&cur->c.lock);
cur = NULL;
if (ret == DROP_THIS_NODE) {
bch2_btree_node_evict(c, cur_k.k);
bch2_btree_node_evict(trans, cur_k.k);
ret = bch2_journal_key_delete(c, b->c.btree_id,
b->c.level, cur_k.k->k.p);
dropped_children = true;
@ -522,17 +524,20 @@ static int bch2_btree_repair_topology_recurse(struct bch_fs *c, struct btree *b)
static int bch2_repair_topology(struct bch_fs *c)
{
struct btree_trans trans;
struct btree *b;
unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
for (i = 0; i < BTREE_ID_NR && !ret; i++) {
b = c->btree_roots[i].b;
if (btree_node_fake(b))
continue;
six_lock_read(&b->c.lock, NULL, NULL);
ret = bch2_btree_repair_topology_recurse(c, b);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
ret = bch2_btree_repair_topology_recurse(&trans, b);
six_unlock_read(&b->c.lock);
if (ret == DROP_THIS_NODE) {
@ -541,13 +546,16 @@ static int bch2_repair_topology(struct bch_fs *c)
}
}
bch2_trans_exit(&trans);
return ret;
}
static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id,
unsigned level, bool is_root,
struct bkey_s_c *k)
{
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(*k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p = { 0 };
@ -747,7 +755,7 @@ static int bch2_check_fix_ptrs(struct bch_fs *c, enum btree_id btree_id,
}
if (level)
bch2_btree_node_update_key_early(c, btree_id, level - 1, *k, new);
bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
if (c->opts.verbose) {
printbuf_reset(&buf);
@ -788,7 +796,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
BUG_ON(bch2_journal_seq_verify &&
k->k->version.lo > atomic64_read(&c->journal.seq));
ret = bch2_check_fix_ptrs(c, btree_id, level, is_root, k);
ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
if (ret)
goto err;
@ -941,7 +949,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
bch2_bkey_buf_reassemble(&cur, c, k);
bch2_btree_and_journal_iter_advance(&iter);
child = bch2_btree_node_get_noiter(c, cur.k,
child = bch2_btree_node_get_noiter(trans, cur.k,
b->c.btree_id, b->c.level - 1,
false);
ret = PTR_ERR_OR_ZERO(child);

View file

@ -1652,9 +1652,15 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
{
six_lock_read(&b->c.lock, NULL, NULL);
struct btree_trans trans;
bch2_trans_init(&trans, c, 0, 0);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
__btree_node_write_done(c, b);
six_unlock_read(&b->c.lock);
bch2_trans_exit(&trans);
}
static void btree_node_write_work(struct work_struct *work)

View file

@ -95,25 +95,14 @@ static void bkey_cached_free(struct btree_key_cache *bc,
six_unlock_intent(&ck->c.lock);
}
static void bkey_cached_free_fast(struct btree_key_cache *bc,
struct bkey_cached *ck)
static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
struct bkey_cached *ck)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
struct btree_key_cache_freelist *f;
bool freed = false;
BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
ck->btree_trans_barrier_seq =
start_poll_synchronize_srcu(&c->btree_trans_barrier);
list_del_init(&ck->list);
atomic_long_inc(&bc->nr_freed);
kfree(ck->k);
ck->k = NULL;
ck->u64s = 0;
preempt_disable();
f = this_cpu_ptr(bc->pcpu_freed);
@ -138,13 +127,32 @@ static void bkey_cached_free_fast(struct btree_key_cache *bc,
list_move_tail(&ck->list, &bc->freed);
mutex_unlock(&bc->lock);
}
}
static void bkey_cached_free_fast(struct btree_key_cache *bc,
struct bkey_cached *ck)
{
struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
ck->btree_trans_barrier_seq =
start_poll_synchronize_srcu(&c->btree_trans_barrier);
list_del_init(&ck->list);
atomic_long_inc(&bc->nr_freed);
kfree(ck->k);
ck->k = NULL;
ck->u64s = 0;
bkey_cached_move_to_freelist(bc, ck);
six_unlock_write(&ck->c.lock);
six_unlock_intent(&ck->c.lock);
}
static struct bkey_cached *
bkey_cached_alloc(struct btree_key_cache *c)
bkey_cached_alloc(struct btree_trans *trans,
struct btree_key_cache *c)
{
struct bkey_cached *ck = NULL;
struct btree_key_cache_freelist *f;
@ -173,8 +181,21 @@ bkey_cached_alloc(struct btree_key_cache *c)
}
if (ck) {
six_lock_intent(&ck->c.lock, NULL, NULL);
six_lock_write(&ck->c.lock, NULL, NULL);
int ret;
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_intent);
if (unlikely(ret)) {
bkey_cached_move_to_freelist(c, ck);
return ERR_PTR(ret);
}
ret = btree_node_lock_nopath(trans, &ck->c, SIX_LOCK_write);
if (unlikely(ret)) {
six_unlock_intent(&ck->c.lock);
bkey_cached_move_to_freelist(c, ck);
return ERR_PTR(ret);
}
return ck;
}
@ -216,15 +237,18 @@ bkey_cached_reuse(struct btree_key_cache *c)
}
static struct bkey_cached *
btree_key_cache_create(struct bch_fs *c,
btree_key_cache_create(struct btree_trans *trans,
enum btree_id btree_id,
struct bpos pos)
{
struct bch_fs *c = trans->c;
struct btree_key_cache *bc = &c->btree_key_cache;
struct bkey_cached *ck;
bool was_new = true;
ck = bkey_cached_alloc(bc);
ck = bkey_cached_alloc(trans, bc);
if (unlikely(IS_ERR(ck)))
return ck;
if (unlikely(!ck)) {
ck = bkey_cached_reuse(bc);
@ -370,7 +394,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck) {
ck = btree_key_cache_create(c, path->btree_id, path->pos);
ck = btree_key_cache_create(trans, path->btree_id, path->pos);
ret = PTR_ERR_OR_ZERO(ck);
if (ret)
goto err;
@ -519,10 +543,15 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
evict:
BUG_ON(!btree_node_intent_locked(c_iter.path, 0));
mark_btree_node_unlocked(c_iter.path, 0);
c_iter.path->l[0].b = NULL;
/*
* XXX: holding a lock that is not marked in btree_trans, not
* ideal:
*/
six_lock_increment(&ck->c.lock, SIX_LOCK_intent);
bch2_trans_unlock(trans);
six_lock_write(&ck->c.lock, NULL, NULL);
/* Will not fail because we are holding no other locks: */
btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_write);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
@ -546,11 +575,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
struct btree_trans trans;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
int ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
bch2_trans_init(&trans, c, 0, 0);
six_lock_read(&ck->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(&trans, &ck->c, SIX_LOCK_read);
key = ck->key;
if (ck->journal.seq != seq ||
@ -567,12 +598,13 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
}
six_unlock_read(&ck->c.lock);
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = commit_do(&trans, NULL, NULL, 0,
btree_key_cache_flush_pos(&trans, key, seq,
BTREE_INSERT_JOURNAL_RECLAIM, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
bch2_trans_exit(&trans);
return ret;
}

View file

@ -61,7 +61,7 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
* locked:
*/
six_lock_readers_add(&b->c.lock, -readers);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
six_lock_readers_add(&b->c.lock, readers);
}

View file

@ -185,6 +185,24 @@ void bch2_btree_node_unlock_write(struct btree_trans *,
/* lock: */
static inline int __must_check
btree_node_lock_nopath(struct btree_trans *trans,
struct btree_bkey_cached_common *b,
enum six_lock_type type)
{
six_lock_type(&b->lock, type, NULL, NULL);
return 0;
}
static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
struct btree_bkey_cached_common *b,
enum six_lock_type type)
{
int ret = btree_node_lock_nopath(trans, b, type);
BUG_ON(ret);
}
static inline int btree_node_lock_type(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,

View file

@ -169,7 +169,7 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
BUG_ON(path->l[b->c.level].b == b &&
path->l[b->c.level].lock_seq == b->c.lock.state.seq);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
bch2_btree_node_hash_remove(&c->btree_cache, b);
__btree_node_free(c, b);
@ -259,7 +259,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
return b;
}
static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned level)
static struct btree *bch2_btree_node_alloc(struct btree_update *as,
struct btree_trans *trans,
unsigned level)
{
struct bch_fs *c = as->c;
struct btree *b;
@ -271,8 +273,8 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b = p->b[--p->nr];
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
set_btree_node_accessed(b);
set_btree_node_dirty_acct(c, b);
@ -323,12 +325,13 @@ static void btree_set_max(struct btree *b, struct bpos pos)
}
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
struct btree_trans *trans,
struct btree *b,
struct bkey_format format)
{
struct btree *n;
n = bch2_btree_node_alloc(as, b->c.level);
n = bch2_btree_node_alloc(as, trans, b->c.level);
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
@ -347,6 +350,7 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
}
static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
struct btree_trans *trans,
struct btree *b)
{
struct bkey_format new_f = bch2_btree_calc_format(b);
@ -358,12 +362,13 @@ static struct btree *bch2_btree_node_alloc_replacement(struct btree_update *as,
if (!bch2_btree_node_format_fits(as->c, b, &new_f))
new_f = b->format;
return __bch2_btree_node_alloc_replacement(as, b, new_f);
return __bch2_btree_node_alloc_replacement(as, trans, b, new_f);
}
static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
static struct btree *__btree_root_alloc(struct btree_update *as,
struct btree_trans *trans, unsigned level)
{
struct btree *b = bch2_btree_node_alloc(as, level);
struct btree *b = bch2_btree_node_alloc(as, trans, level);
btree_set_min(b, POS_MIN);
btree_set_max(b, SPOS_MAX);
@ -378,7 +383,7 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
return b;
}
static void bch2_btree_reserve_put(struct btree_update *as)
static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
struct prealloc_nodes *p;
@ -405,8 +410,8 @@ static void bch2_btree_reserve_put(struct btree_update *as)
mutex_unlock(&c->btree_reserve_cache_lock);
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
__btree_node_free(c, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
@ -460,7 +465,7 @@ static int bch2_btree_reserve_get(struct btree_trans *trans,
/* Asynchronous interior node update machinery */
static void bch2_btree_update_free(struct btree_update *as)
static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
@ -473,7 +478,7 @@ static void bch2_btree_update_free(struct btree_update *as)
bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
bch2_disk_reservation_put(c, &as->disk_res);
bch2_btree_reserve_put(as);
bch2_btree_reserve_put(as, trans);
bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total],
as->start_time);
@ -551,12 +556,13 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
static void btree_update_nodes_written(struct btree_update *as)
{
struct bch_fs *c = as->c;
struct btree *b = as->b;
struct btree *b;
struct btree_trans trans;
u64 journal_seq = 0;
unsigned i;
int ret;
bch2_trans_init(&trans, c, 0, 512);
/*
* If we're already in an error state, it might be because a btree node
* was never written, and we might be trying to free that same btree
@ -573,15 +579,16 @@ static void btree_update_nodes_written(struct btree_update *as)
* on disk:
*/
for (i = 0; i < as->nr_old_nodes; i++) {
struct btree *old = as->old_nodes[i];
__le64 seq;
six_lock_read(&old->c.lock, NULL, NULL);
seq = old->data ? old->data->keys.seq : 0;
six_unlock_read(&old->c.lock);
b = as->old_nodes[i];
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
seq = b->data ? b->data->keys.seq : 0;
six_unlock_read(&b->c.lock);
if (seq == as->old_nodes_seq[i])
wait_on_bit_io(&old->flags, BTREE_NODE_write_in_flight_inner,
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
TASK_UNINTERRUPTIBLE);
}
@ -598,19 +605,19 @@ static void btree_update_nodes_written(struct btree_update *as)
* journal reclaim does btree updates when flushing bkey_cached entries,
* which may require allocations as well.
*/
bch2_trans_init(&trans, c, 0, 512);
ret = commit_do(&trans, &as->disk_res, &journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
JOURNAL_WATERMARK_reserved,
btree_update_nodes_written_trans(&trans, as));
bch2_trans_exit(&trans);
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
JOURNAL_WATERMARK_reserved,
btree_update_nodes_written_trans(&trans, as));
bch2_trans_unlock(&trans);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"error %i in btree_update_nodes_written()", ret);
err:
if (b) {
if (as->b) {
b = as->b;
/*
* @b is the node we did the final insert into:
*
@ -623,8 +630,8 @@ static void btree_update_nodes_written(struct btree_update *as)
* we're in journal error state:
*/
six_lock_intent(&b->c.lock, NULL, NULL);
six_lock_write(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_write);
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
@ -681,7 +688,7 @@ static void btree_update_nodes_written(struct btree_update *as)
for (i = 0; i < as->nr_new_nodes; i++) {
b = as->new_nodes[i];
six_lock_read(&b->c.lock, NULL, NULL);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
}
@ -689,7 +696,8 @@ static void btree_update_nodes_written(struct btree_update *as)
for (i = 0; i < as->nr_open_buckets; i++)
bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]);
bch2_btree_update_free(as);
bch2_btree_update_free(as, &trans);
bch2_trans_exit(&trans);
}
static void btree_interior_update_work(struct work_struct *work)
@ -936,7 +944,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as,
as->nr_old_nodes++;
}
static void bch2_btree_update_done(struct btree_update *as)
static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *trans)
{
struct bch_fs *c = as->c;
u64 start_time = as->start_time;
@ -947,7 +955,7 @@ static void bch2_btree_update_done(struct btree_update *as)
up_read(&as->c->gc_lock);
as->took_gc_lock = false;
bch2_btree_reserve_put(as);
bch2_btree_reserve_put(as, trans);
continue_at(&as->cl, btree_update_set_nodes_written,
as->c->btree_interior_update_worker);
@ -1102,7 +1110,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
bch2_trans_verify_not_restarted(trans, restart_count);
return as;
err:
bch2_btree_update_free(as);
bch2_btree_update_free(as, trans);
return ERR_PTR(ret);
}
@ -1254,6 +1262,7 @@ __bch2_btree_insert_keys_interior(struct btree_update *as,
* node)
*/
static struct btree *__btree_split_node(struct btree_update *as,
struct btree_trans *trans,
struct btree *n1)
{
struct bkey_format_state s;
@ -1263,7 +1272,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level);
n2 = bch2_btree_node_alloc(as, trans, n1->c.level);
n2->data->max_key = n1->data->max_key;
n2->data->format = n1->format;
@ -1427,7 +1436,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
n1 = bch2_btree_node_alloc_replacement(as, b);
n1 = bch2_btree_node_alloc_replacement(as, trans, b);
if (keys)
btree_split_insert_keys(as, trans, path, n1, keys);
@ -1435,7 +1444,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (bset_u64s(&n1->set[0]) > BTREE_SPLIT_THRESHOLD(c)) {
trace_and_count(c, btree_node_split, c, b);
n2 = __btree_split_node(as, n1);
n2 = __btree_split_node(as, trans, n1);
bch2_btree_build_aux_trees(n2);
bch2_btree_build_aux_trees(n1);
@ -1457,7 +1466,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (!parent) {
/* Depth increases, make a new root */
n3 = __btree_root_alloc(as, b->c.level + 1);
n3 = __btree_root_alloc(as, trans, b->c.level + 1);
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
@ -1622,7 +1631,7 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
return PTR_ERR(as);
btree_split(as, trans, path, b, NULL, flags);
bch2_btree_update_done(as);
bch2_btree_update_done(as, trans);
for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
ret = bch2_foreground_maybe_merge(trans, path, l, flags);
@ -1741,7 +1750,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
bch2_btree_interior_update_will_free_node(as, m);
n = bch2_btree_node_alloc(as, b->c.level);
n = bch2_btree_node_alloc(as, trans, b->c.level);
SET_BTREE_NODE_SEQ(n->data,
max(BTREE_NODE_SEQ(b->data),
@ -1788,7 +1797,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
bch2_btree_update_done(as, trans);
bch2_time_stats_update(&c->times[BCH_TIME_btree_node_merge], start_time);
out:
@ -1822,7 +1831,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_interior_update_will_free_node(as, b);
n = bch2_btree_node_alloc_replacement(as, b);
n = bch2_btree_node_alloc_replacement(as, trans, b);
bch2_btree_update_add_new_node(as, n);
bch2_btree_build_aux_trees(n);
@ -1847,7 +1856,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_node_free_inmem(trans, b);
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as);
bch2_btree_update_done(as, trans);
out:
bch2_btree_path_downgrade(trans, iter->path);
return ret;

View file

@ -117,6 +117,7 @@ struct btree_update {
};
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
struct btree_trans *,
struct btree *,
struct bkey_format);

View file

@ -169,10 +169,13 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
struct btree_trans trans;
unsigned long old, new, v;
unsigned idx = w - b->writes;
six_lock_read(&b->c.lock, NULL, NULL);
bch2_trans_init(&trans, c, 0, 0);
btree_node_lock_nopath_nofail(&trans, &b->c, SIX_LOCK_read);
v = READ_ONCE(b->flags);
do {
@ -188,6 +191,8 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
btree_node_write_if_need(c, b, SIX_LOCK_read);
six_unlock_read(&b->c.lock);
bch2_trans_exit(&trans);
return 0;
}