bcachefs: Tweak btree key cache shrinker so it actually frees

Freeing key cache items is a multi stage process; we need to wait for an
SRCU grace period to elapse, and we handle this ourselves - partially to
avoid callback overhead, but primarily so that when allocating we can
first allocate from the freed items waiting for an SRCU grace period.

Previously, the shrinker was counting the items on the 'waiting for SRCU
grace period' lists as items being scanned, but this meant that too many
items waiting for an SRCU grace period could prevent it from doing any
work at all.

After this, we're seeing that items skipped due to the accessed bit are
the main cause of the shrinker not making any progress, and we actually
want the key cache shrinker to run quite aggressively because reclaimed
items will still generally be found (more compactly) in the btree node
cache - so we also tweak the shrinker to not count those against
nr_to_scan.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-04-20 15:35:40 -04:00
parent 6e4d9bd110
commit adfe9357c3
1 changed files with 4 additions and 15 deletions

View File

@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
scanned += bc->nr_freed_nonpcpu;
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_nonpcpu--;
}
if (scanned >= nr)
goto out;
scanned += bc->nr_freed_pcpu;
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_pcpu--;
}
if (scanned >= nr)
goto out;
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size)
@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash);
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
goto next;
if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
else if (bkey_cached_lock_for_evict(ck)) {
goto next;
} else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
}
@ -916,7 +906,6 @@ next:
} while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock();
out:
memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock);