diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index ee3c1f40b500..40281a9acbbc 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -596,12 +596,13 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, struct btree_cache *bc = &c->btree_cache; struct btree *b; + BUG_ON(level + 1 >= BTREE_MAX_DEPTH); /* * Parent node must be locked, else we could read in a btree node that's * been freed: */ - BUG_ON(!btree_node_locked(iter, level + 1)); - BUG_ON(level >= BTREE_MAX_DEPTH); + if (!bch2_btree_node_relock(iter, level + 1)) + return ERR_PTR(-EINTR); b = bch2_btree_node_mem_alloc(c); if (IS_ERR(b)) @@ -624,13 +625,9 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, } /* - * If the btree node wasn't cached, we can't drop our lock on - * the parent until after it's added to the cache - because - * otherwise we could race with a btree_split() freeing the node - * we're trying to lock. + * Unlock before doing IO: * - * But the deadlock described below doesn't exist in this case, - * so it's safe to not drop the parent lock until here: + * XXX: ideally should be dropping all btree node locks here */ if (btree_node_read_locked(iter, level + 1)) btree_node_unlock(iter, level + 1); @@ -667,16 +664,11 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, struct btree *b; struct bset_tree *t; - /* - * XXX: locking optimization - * - * we can make the locking looser here - caller can drop lock on parent - * node before locking child node (and potentially blocking): we just - * have to have bch2_btree_node_fill() call relock on the parent and - * return -EINTR if that fails - */ - EBUG_ON(!btree_node_locked(iter, level + 1)); EBUG_ON(level >= BTREE_MAX_DEPTH); + + b = btree_node_mem_ptr(k); + if (b) + goto lock_node; retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { @@ -694,6 +686,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, if (IS_ERR(b)) return b; } else { +lock_node: /* * There's a potential deadlock with splits and insertions into * interior nodes we have to avoid: @@ -740,6 +733,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter, } } + /* XXX: waiting on IO with btree locks held: */ wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, TASK_UNINTERRUPTIBLE); diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index 6e7edcaf6675..5d85987457bf 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -47,6 +47,13 @@ static inline u64 btree_ptr_hash_val(const struct bkey_i *k) } } +static inline struct btree *btree_node_mem_ptr(const struct bkey_i *k) +{ + return k->k.type == KEY_TYPE_btree_ptr_v2 + ? (void *)(unsigned long)bkey_i_to_btree_ptr_v2_c(k)->v.mem_ptr + : NULL; +} + /* is btree node in hash table? */ static inline bool btree_node_hashed(struct btree *b) { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 5fa31698ed67..00d796cb418b 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1647,6 +1647,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, b->written += sectors_to_write; + /* XXX: submitting IO with btree locks held: */ bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_BTREE, &k.key); return; err: diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 37c60842c670..3817dcb5fa1f 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -912,6 +912,27 @@ static void btree_iter_prefetch(struct btree_iter *iter) btree_node_unlock(iter, iter->level); } +static noinline void btree_node_mem_ptr_set(struct btree_iter *iter, + unsigned plevel, struct btree *b) +{ + struct btree_iter_level *l = &iter->l[plevel]; + bool locked = btree_node_locked(iter, plevel); + struct bkey_packed *k; + struct bch_btree_ptr_v2 *bp; + + if (!bch2_btree_node_relock(iter, plevel)) + return; + + k = bch2_btree_node_iter_peek_all(&l->iter, l->b); + BUG_ON(k->type != KEY_TYPE_btree_ptr_v2); + + bp = (void *) bkeyp_val(&l->b->format, k); + bp->mem_ptr = (unsigned long)b; + + if (!locked) + btree_node_unlock(iter, plevel); +} + static __always_inline int btree_iter_down(struct btree_iter *iter) { struct bch_fs *c = iter->trans->c; @@ -933,6 +954,10 @@ static __always_inline int btree_iter_down(struct btree_iter *iter) mark_btree_node_locked(iter, level, lock_type); btree_iter_node_set(iter, b); + if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 && + unlikely(b != btree_node_mem_ptr(&tmp.k))) + btree_node_mem_ptr_set(iter, level + 1, b); + if (iter->flags & BTREE_ITER_PREFETCH) btree_iter_prefetch(iter);