bcachefs: Fix race leading to btree node write getting stuck

Checking btree_node_may_write() isn't atomic with the other btree flags,
dirty and need_write in particular. There was a rare race where we'd
unblock a node from writing while __btree_node_flush() was setting
need_write, and no thread would notice that the node was now both able
to write and needed to be written.

Fix this by adding btree node flags for will_make_reachable and
write_blocked that can be checked in the cmpxchg loop in
__bch2_btree_node_write.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-02-27 09:56:33 -05:00 committed by Kent Overstreet
parent 6f5f747c31
commit bf3efff5e4
5 changed files with 19 additions and 13 deletions

View file

@ -223,10 +223,9 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
goto wait_on_io;
}
if (btree_node_noevict(b))
goto out_unlock;
if (!btree_node_may_write(b))
if (btree_node_noevict(b) ||
btree_node_write_blocked(b) ||
btree_node_will_make_reachable(b))
goto out_unlock;
if (btree_node_dirty(b)) {

View file

@ -1606,7 +1606,8 @@ static void __btree_node_write_done(struct bch_fs *c, struct btree *b)
if ((old & (1U << BTREE_NODE_dirty)) &&
(old & (1U << BTREE_NODE_need_write)) &&
!(old & (1U << BTREE_NODE_never_write)) &&
btree_node_may_write(b)) {
!(old & (1U << BTREE_NODE_write_blocked)) &&
!(old & (1U << BTREE_NODE_will_make_reachable))) {
new &= ~(1U << BTREE_NODE_dirty);
new &= ~(1U << BTREE_NODE_need_write);
new |= (1U << BTREE_NODE_write_in_flight);
@ -1778,10 +1779,13 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
!(old & (1 << BTREE_NODE_need_write)))
return;
if (!btree_node_may_write(b))
if (old &
((1 << BTREE_NODE_never_write)|
(1 << BTREE_NODE_write_blocked)))
return;
if (old & (1 << BTREE_NODE_never_write))
if (b->written &&
(old & (1 << BTREE_NODE_will_make_reachable)))
return;
if (old & (1 << BTREE_NODE_write_in_flight))

View file

@ -62,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *);
void bch2_btree_node_wait_on_read(struct btree *);
void bch2_btree_node_wait_on_write(struct btree *);
static inline bool btree_node_may_write(struct btree *b)
{
return list_empty_careful(&b->write_blocked) &&
(!b->written || !b->will_make_reachable);
}
enum compact_mode {
COMPACT_LAZY,
COMPACT_ALL,

View file

@ -434,6 +434,8 @@ struct btree_trans {
x(read_error) \
x(dirty) \
x(need_write) \
x(write_blocked) \
x(will_make_reachable) \
x(noevict) \
x(write_idx) \
x(accessed) \

View file

@ -606,6 +606,8 @@ static void btree_update_nodes_written(struct btree_update *as)
mutex_lock(&c->btree_interior_update_lock);
list_del(&as->write_blocked_list);
if (list_empty(&b->write_blocked))
clear_btree_node_write_blocked(b);
/*
* Node might have been freed, recheck under
@ -650,6 +652,7 @@ static void btree_update_nodes_written(struct btree_update *as)
BUG_ON(b->will_make_reachable != (unsigned long) as);
b->will_make_reachable = 0;
clear_btree_node_will_make_reachable(b);
}
mutex_unlock(&c->btree_interior_update_lock);
@ -716,6 +719,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
as->mode = BTREE_INTERIOR_UPDATING_NODE;
as->b = b;
set_btree_node_write_blocked(b);
list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock);
@ -781,6 +786,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
as->new_nodes[as->nr_new_nodes++] = b;
b->will_make_reachable = 1UL|(unsigned long) as;
set_btree_node_will_make_reachable(b);
mutex_unlock(&c->btree_interior_update_lock);
@ -803,6 +809,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b)
* xchg() is for synchronization with bch2_btree_complete_write:
*/
v = xchg(&b->will_make_reachable, 0);
clear_btree_node_will_make_reachable(b);
as = (struct btree_update *) (v & ~1UL);
if (!as) {