bcachefs: Delete old deadlock avoidance code

This deletes our old lock ordering based deadlock avoidance code.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-08-22 15:29:53 -04:00 committed by Kent Overstreet
parent 96d994b37c
commit 0d7009d7ca
9 changed files with 40 additions and 311 deletions

View file

@ -151,8 +151,6 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
/* Cause future lookups for this node to fail: */
b->hash_val = 0;
six_lock_wakeup_all(&b->c.lock);
}
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
@ -755,16 +753,6 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
return b;
}
static int lock_node_check_fn(struct six_lock *lock, void *p)
{
struct btree *b = container_of(lock, struct btree, c.lock);
const struct bkey_i *k = p;
if (b->hash_val != btree_ptr_hash_val(k))
return BCH_ERR_lock_fail_node_reused;
return 0;
}
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
{
struct printbuf buf = PRINTBUF;
@ -886,15 +874,11 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(trans, path, level + 1);
ret = btree_node_lock(trans, path, &b->c, k->k.p, level, lock_type,
lock_node_check_fn, (void *) k, trace_ip);
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ERR_PTR(ret);
BUG();
}
ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ERR_PTR(ret);
BUG_ON(ret);
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->c.level != level ||
@ -1000,13 +984,10 @@ struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans,
} else {
lock_node:
ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read);
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ERR_PTR(ret);
BUG();
}
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ERR_PTR(ret);
BUG_ON(ret);
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
b->c.btree_id != btree_id ||

View file

@ -689,16 +689,6 @@ void bch2_trans_node_reinit_iter(struct btree_trans *trans, struct btree *b)
/* Btree path: traverse, set_pos: */
static int lock_root_check_fn(struct six_lock *lock, void *p)
{
struct btree *b = container_of(lock, struct btree, c.lock);
struct btree **rootp = p;
if (b != *rootp)
return BCH_ERR_lock_fail_root_changed;
return 0;
}
static inline int btree_path_lock_root(struct btree_trans *trans,
struct btree_path *path,
unsigned depth_want,
@ -730,10 +720,8 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
}
lock_type = __btree_lock_want(path, path->level);
ret = btree_node_lock(trans, path, &b->c, SPOS_MAX,
path->level, lock_type,
lock_root_check_fn, rootp,
trace_ip);
ret = btree_node_lock(trans, path, &b->c,
path->level, lock_type, trace_ip);
if (unlikely(ret)) {
if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
continue;
@ -939,7 +927,7 @@ static int btree_path_traverse_one(struct btree_trans *, struct btree_path *,
static int bch2_btree_path_traverse_all(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
struct btree_path *path, *prev;
struct btree_path *path;
unsigned long trace_ip = _RET_IP_;
int i, ret = 0;
@ -948,7 +936,6 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
trans->in_traverse_all = true;
retry_all:
prev = NULL;
trans->restarted = 0;
trans_for_each_path(trans, path)
@ -956,18 +943,6 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
btree_trans_sort_paths(trans);
trans_for_each_path_inorder_reverse(trans, path, i) {
if (prev) {
if (path->btree_id == prev->btree_id &&
path->locks_want < prev->locks_want)
__bch2_btree_path_upgrade(trans, path, prev->locks_want);
else if (!path->locks_want && prev->locks_want)
__bch2_btree_path_upgrade(trans, path, 1);
}
prev = path;
}
bch2_trans_unlock(trans);
cond_resched();
@ -3026,16 +3001,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
b = READ_ONCE(trans->locking);
if (b) {
path = &trans->paths[trans->locking_path_idx];
prt_printf(out, " locking path %u %c l=%u %c %s:",
trans->locking_path_idx,
path->cached ? 'c' : 'b',
trans->locking_level,
lock_types[trans->locking_wait.lock_want],
bch2_btree_ids[trans->locking_btree_id]);
bch2_bpos_to_text(out, trans->locking_pos);
prt_printf(out, " node ");
prt_printf(out, " locking node ");
bch2_btree_path_node_to_text(out, b);
prt_printf(out, "\n");
}

View file

@ -398,17 +398,6 @@ static int btree_key_cache_fill(struct btree_trans *trans,
return ret;
}
static int bkey_cached_check_fn(struct six_lock *lock, void *p)
{
struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
const struct btree_path *path = p;
if (ck->key.btree_id != path->btree_id &&
bpos_cmp(ck->key.pos, path->pos))
return BCH_ERR_lock_fail_node_reused;
return 0;
}
__flatten
int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path,
unsigned flags)
@ -440,16 +429,12 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0,
lock_want,
bkey_cached_check_fn, path, _THIS_IP_);
if (ret) {
if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
BUG();
}
ret = btree_node_lock(trans, path, (void *) ck, 0,
lock_want, _THIS_IP_);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto err;
BUG_ON(ret);
if (ck->key.btree_id != path->btree_id ||
bpos_cmp(ck->key.pos, path->pos)) {

View file

@ -92,6 +92,7 @@ static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
int ret;
if (i == g->g) {
trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
} else {
i->trans->lock_must_abort = true;
@ -216,8 +217,10 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
struct btree_path *path;
int ret;
if (trans->lock_must_abort)
if (trans->lock_must_abort) {
trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
}
g.nr = 0;
ret = lock_graph_descend(&g, trans, cycle);
@ -294,7 +297,7 @@ int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
return bch2_check_for_deadlock(trans, NULL);
}
int __bch2_btree_node_lock_write(struct btree_trans *trans,
int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
struct btree_bkey_cached_common *b,
bool lock_may_not_fail)
{
@ -311,99 +314,12 @@ int __bch2_btree_node_lock_write(struct btree_trans *trans,
ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write, lock_may_not_fail);
six_lock_readers_add(&b->lock, readers);
if (ret)
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
return ret;
}
static inline bool path_has_read_locks(struct btree_path *path)
{
unsigned l;
for (l = 0; l < BTREE_MAX_DEPTH; l++)
if (btree_node_read_locked(path, l))
return true;
return false;
}
/* Slowpath: */
int __bch2_btree_node_lock(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,
struct bpos pos, unsigned level,
enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
struct btree_path *linked;
unsigned reason;
/* Check if it's safe to block: */
trans_for_each_path(trans, linked) {
if (!linked->nodes_locked)
continue;
/*
* Can't block taking an intent lock if we have _any_ nodes read
* locked:
*
* - Our read lock blocks another thread with an intent lock on
* the same node from getting a write lock, and thus from
* dropping its intent lock
*
* - And the other thread may have multiple nodes intent locked:
* both the node we want to intent lock, and the node we
* already have read locked - deadlock:
*/
if (type == SIX_LOCK_intent &&
path_has_read_locks(linked)) {
reason = 1;
goto deadlock;
}
if (linked->btree_id != path->btree_id) {
if (linked->btree_id < path->btree_id)
continue;
reason = 3;
goto deadlock;
}
/*
* Within the same btree, non-cached paths come before cached
* paths:
*/
if (linked->cached != path->cached) {
if (!linked->cached)
continue;
reason = 4;
goto deadlock;
}
/*
* Interior nodes must be locked before their descendants: if
* another path has possible descendants locked of the node
* we're about to lock, it must have the ancestors locked too:
*/
if (level > btree_path_highest_level_locked(linked)) {
reason = 5;
goto deadlock;
}
/* Must lock btree nodes in key order: */
if (btree_node_locked(linked, level) &&
bpos_cmp(pos, btree_node_pos(&linked->l[level].b->c)) <= 0) {
reason = 7;
goto deadlock;
}
}
return btree_node_lock_type(trans, path, b, pos, level,
type, should_sleep_fn, p);
deadlock:
trace_and_count(trans->c, trans_restart_would_deadlock, trans, ip, reason, linked, path, &pos);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
}
/* relock */
static inline bool btree_path_get_locks(struct btree_trans *trans,

View file

@ -195,8 +195,8 @@ static inline int __btree_node_lock_nopath(struct btree_trans *trans,
int ret;
trans->lock_may_not_fail = lock_may_not_fail;
trans->locking = b;
trans->lock_must_abort = false;
trans->locking = b;
ret = six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
bch2_six_check_for_deadlock, trans);
@ -222,26 +222,6 @@ static inline void btree_node_lock_nopath_nofail(struct btree_trans *trans,
BUG_ON(ret);
}
static inline int btree_node_lock_type(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,
struct bpos pos, unsigned level,
enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p)
{
if (six_trylock_type(&b->lock, type))
return 0;
trans->locking_path_idx = path->idx;
trans->locking_pos = pos;
trans->locking_btree_id = path->btree_id;
trans->locking_level = level;
trans->lock_may_not_fail = false;
trans->locking = b;
return six_lock_type_waiter(&b->lock, type, &trans->locking_wait,
bch2_six_check_for_deadlock, trans);
}
/*
* Lock a btree node if we already have it locked on one of our linked
* iterators:
@ -263,19 +243,11 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
return false;
}
int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
struct btree_bkey_cached_common *,
struct bpos, unsigned,
enum six_lock_type,
six_lock_should_sleep_fn, void *,
unsigned long);
static inline int btree_node_lock(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,
struct bpos pos, unsigned level,
unsigned level,
enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
int ret = 0;
@ -285,8 +257,7 @@ static inline int btree_node_lock(struct btree_trans *trans,
if (likely(six_trylock_type(&b->lock, type)) ||
btree_node_lock_increment(trans, b, level, type) ||
!(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
should_sleep_fn, p, ip))) {
!(ret = btree_node_lock_nopath(trans, b, type))) {
#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
path->l[b->level].lock_taken_time = ktime_get_ns();
#endif
@ -295,15 +266,14 @@ static inline int btree_node_lock(struct btree_trans *trans,
return ret;
}
int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_bkey_cached_common *, bool);
int __bch2_btree_node_lock_write(struct btree_trans *, struct btree_path *,
struct btree_bkey_cached_common *b, bool);
static inline int __btree_node_lock_write(struct btree_trans *trans,
struct btree_path *path,
struct btree_bkey_cached_common *b,
bool lock_may_not_fail)
{
int ret;
EBUG_ON(&path->l[b->level].b->c != b);
EBUG_ON(path->l[b->level].lock_seq != b->lock.state.seq);
EBUG_ON(!btree_node_intent_locked(path, b->level));
@ -315,13 +285,9 @@ static inline int __btree_node_lock_write(struct btree_trans *trans,
*/
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_write);
ret = likely(six_trylock_write(&b->lock))
return likely(six_trylock_write(&b->lock))
? 0
: __bch2_btree_node_lock_write(trans, b, lock_may_not_fail);
if (ret)
mark_btree_node_locked_noreset(path, b->level, SIX_LOCK_intent);
return ret;
: __bch2_btree_node_lock_write(trans, path, b, lock_may_not_fail);
}
static inline void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,

View file

@ -391,10 +391,6 @@ struct btree_trans {
struct list_head list;
u64 last_begin_time;
unsigned locking_path_idx;
struct bpos locking_pos;
u8 locking_btree_id;
u8 locking_level;
u8 lock_may_not_fail;
u8 lock_must_abort;
struct btree_bkey_cached_common *locking;

View file

@ -796,23 +796,6 @@ static inline void normalize_read_intent_locks(struct btree_trans *trans)
bch2_trans_verify_locks(trans);
}
static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct btree_path *pos)
{
struct btree_path *path;
unsigned i;
trans_for_each_path_inorder(trans, path, i) {
//if (path == pos)
// break;
if (btree_node_read_locked(path, path->level) &&
!bch2_btree_path_upgrade_noupgrade_sibs(trans, path, path->level + 1))
return true;
}
return false;
}
static inline int trans_lock_write(struct btree_trans *trans)
{
struct btree_insert_entry *i;
@ -822,31 +805,15 @@ static inline int trans_lock_write(struct btree_trans *trans)
if (same_leaf_as_prev(trans, i))
continue;
/*
* six locks are unfair, and read locks block while a thread
* wants a write lock: thus, we need to tell the cycle detector
* we have a write lock _before_ taking the lock:
*/
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_write);
if (!six_trylock_write(&insert_l(i)->b->c.lock)) {
if (have_conflicting_read_lock(trans, i->path))
goto fail;
ret = btree_node_lock_type(trans, i->path,
&insert_l(i)->b->c,
i->path->pos, i->level,
SIX_LOCK_write, NULL, NULL);
BUG_ON(ret);
}
ret = bch2_btree_node_lock_write(trans, i->path, &insert_l(i)->b->c);
if (ret)
goto fail;
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
}
return 0;
fail:
mark_btree_node_locked_noreset(i->path, i->level, SIX_LOCK_intent);
while (--i >= trans->updates) {
if (same_leaf_as_prev(trans, i))
continue;

View file

@ -52,7 +52,6 @@
x(BCH_ERR_no_btree_node, no_btree_node_down) \
x(BCH_ERR_no_btree_node, no_btree_node_init) \
x(BCH_ERR_no_btree_node, no_btree_node_cached) \
x(0, lock_fail_node_reused) \
x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \
x(0, fsck) \

View file

@ -1012,57 +1012,10 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
TP_ARGS(trans, caller_ip, path)
);
TRACE_EVENT(trans_restart_would_deadlock,
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
unsigned reason,
struct btree_path *have,
struct btree_path *want,
struct bpos *want_pos),
TP_ARGS(trans, caller_ip, reason,
have, want, want_pos),
TP_STRUCT__entry(
__array(char, trans_fn, 32 )
__field(unsigned long, caller_ip )
__field(u8, in_traverse_all )
__field(u8, reason )
__field(u8, have_btree_id )
__field(u8, have_type )
__field(u8, want_btree_id )
__field(u8, want_type )
TRACE_BPOS_entries(have_pos)
TRACE_BPOS_entries(want_pos)
),
TP_fast_assign(
strlcpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
__entry->caller_ip = caller_ip;
__entry->in_traverse_all = trans->in_traverse_all;
__entry->reason = reason;
__entry->have_btree_id = have->btree_id;
__entry->have_type = have->cached;
__entry->want_btree_id = want->btree_id;
__entry->want_type = want->cached;
TRACE_BPOS_assign(have_pos, have->pos);
TRACE_BPOS_assign(want_pos, *want_pos);
),
TP_printk("%s %pS traverse_all %u because %u have %u:%u %llu:%llu:%u want %u:%u %llu:%llu:%u",
__entry->trans_fn,
(void *) __entry->caller_ip,
__entry->in_traverse_all,
__entry->reason,
__entry->have_btree_id,
__entry->have_type,
__entry->have_pos_inode,
__entry->have_pos_offset,
__entry->have_pos_snapshot,
__entry->want_btree_id,
__entry->want_type,
__entry->want_pos_inode,
__entry->want_pos_offset,
__entry->want_pos_snapshot)
unsigned long caller_ip),
TP_ARGS(trans, caller_ip)
);
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,