From 4d8100daa9bb6c243cd39be0956005a76eec36ee Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 15 Mar 2019 18:20:46 -0400 Subject: [PATCH] bcachefs: Allocate fs_usage in do_btree_insert_at() Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 5 +- fs/bcachefs/btree_update_interior.c | 13 ++-- fs/bcachefs/btree_update_leaf.c | 44 +++++++++--- fs/bcachefs/buckets.c | 102 ++++++++++++++++++++-------- fs/bcachefs/buckets.h | 14 ++-- fs/bcachefs/extents.c | 5 +- fs/bcachefs/replicas.c | 56 ++++++++++----- fs/bcachefs/replicas.h | 2 + fs/bcachefs/super.c | 4 +- 9 files changed, 170 insertions(+), 75 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 0b495dd32f67..27ffecb912a3 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -635,7 +635,10 @@ struct bch_fs { struct percpu_rw_semaphore mark_lock; struct bch_fs_usage __percpu *usage[2]; - struct bch_fs_usage __percpu *usage_scratch; + + /* single element mempool: */ + struct mutex usage_scratch_lock; + struct bch_fs_usage *usage_scratch; /* * When we invalidate buckets, we use both the priority and the amount diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7ccf2f935701..31c1474cd494 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1076,8 +1076,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->mark_lock); - preempt_disable(); - fs_usage = bch2_fs_usage_get_scratch(c); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, @@ -1090,7 +1089,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) fs_usage); bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); - preempt_enable(); + bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); } @@ -1171,8 +1170,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->mark_lock); - preempt_disable(); - fs_usage = bch2_fs_usage_get_scratch(c); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(insert), true, 0, @@ -1193,7 +1191,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); - preempt_enable(); + bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); @@ -1987,7 +1985,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->mark_lock); - fs_usage = bch2_fs_usage_get_scratch(c); + fs_usage = bch2_fs_usage_scratch_get(c); bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, @@ -1998,6 +1996,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, fs_usage); bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res); + bch2_fs_usage_scratch_put(c, fs_usage); percpu_up_read(&c->mark_lock); mutex_unlock(&c->btree_interior_update_lock); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 42fdb6c2963a..5349790547f4 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -269,8 +269,6 @@ static void btree_insert_key_leaf(struct btree_trans *trans, int old_live_u64s = b->nr.live_u64s; int live_u64s_added, u64s_added; - bch2_mark_update(trans, insert); - if (!btree_node_is_extents(b)) bch2_insert_fixup_key(trans, insert); else @@ -499,11 +497,6 @@ btree_key_can_insert(struct btree_trans *trans, if (unlikely(btree_node_fake(b))) return BTREE_INSERT_BTREE_NODE_FULL; - if (!bch2_bkey_replicas_marked(c, - bkey_i_to_s_c(insert->k), - true)) - return BTREE_INSERT_NEED_MARK_REPLICAS; - ret = !btree_node_is_extents(b) ? BTREE_INSERT_OK : bch2_extent_can_insert(trans, insert, u64s); @@ -555,6 +548,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, struct btree_insert_entry **stopped_at) { struct bch_fs *c = trans->c; + struct bch_fs_usage *fs_usage = NULL; struct btree_insert_entry *i; struct btree_iter *linked; int ret; @@ -562,12 +556,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans, trans_for_each_update_iter(trans, i) BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); + trans_for_each_update_iter(trans, i) { + if (i->deferred || + !btree_node_type_needs_gc(i->iter->btree_id)) + continue; + + if (!fs_usage) { + percpu_down_read(&c->mark_lock); + fs_usage = bch2_fs_usage_scratch_get(c); + } + + if (!bch2_bkey_replicas_marked_locked(c, + bkey_i_to_s_c(i->k), true)) { + ret = BTREE_INSERT_NEED_MARK_REPLICAS; + goto out; + } + } + btree_trans_lock_write(c, trans); if (race_fault()) { ret = -EINTR; trans_restart(" (race)"); - goto out; + goto out_unlock; } /* @@ -577,7 +588,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, */ ret = btree_trans_check_can_insert(trans, stopped_at); if (ret) - goto out; + goto out_unlock; /* * Don't get journal reservation until after we know insert will @@ -585,7 +596,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans, */ ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK); if (ret) - goto out; + goto out_unlock; if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (journal_seq_verify(c)) @@ -610,14 +621,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans, } } + trans_for_each_update_iter(trans, i) + bch2_mark_update(trans, i, fs_usage); + if (fs_usage) + bch2_trans_fs_usage_apply(trans, fs_usage); + trans_for_each_update(trans, i) do_btree_insert_one(trans, i); -out: +out_unlock: BUG_ON(ret && (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) && trans->journal_res.ref); btree_trans_unlock_write(trans); +out: + if (fs_usage) { + bch2_fs_usage_scratch_put(c, fs_usage); + percpu_up_read(&c->mark_lock); + } + bch2_journal_res_put(&c->journal, &trans->journal_res); return ret; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3744d55b8495..2fbcd85d9e75 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -144,6 +144,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c) percpu_up_write(&c->mark_lock); } +void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage) +{ + if (fs_usage == c->usage_scratch) + mutex_unlock(&c->usage_scratch_lock); + else + kfree(fs_usage); +} + +struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c) +{ + struct bch_fs_usage *ret; + unsigned bytes = fs_usage_u64s(c) * sizeof(u64); + + ret = kzalloc(bytes, GFP_NOWAIT); + if (ret) + return ret; + + if (mutex_trylock(&c->usage_scratch_lock)) + goto out_pool; + + ret = kzalloc(bytes, GFP_NOFS); + if (ret) + return ret; + + mutex_lock(&c->usage_scratch_lock); +out_pool: + ret = c->usage_scratch; + memset(ret, 0, bytes); + return ret; +} + struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca) { struct bch_dev_usage ret; @@ -906,31 +937,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, unsigned journal_seq, unsigned flags, bool gc) { + int ret = 0; + + preempt_disable(); + if (!fs_usage || gc) fs_usage = this_cpu_ptr(c->usage[gc]); switch (k.k->type) { case KEY_TYPE_alloc: - return bch2_mark_alloc(c, k, inserting, + ret = bch2_mark_alloc(c, k, inserting, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_btree_ptr: - return bch2_mark_extent(c, k, inserting + ret = bch2_mark_extent(c, k, inserting ? c->opts.btree_node_size : -c->opts.btree_node_size, BCH_DATA_BTREE, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_extent: - return bch2_mark_extent(c, k, sectors, BCH_DATA_USER, + ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_stripe: - return bch2_mark_stripe(c, k, inserting, + ret = bch2_mark_stripe(c, k, inserting, fs_usage, journal_seq, flags, gc); + break; case KEY_TYPE_inode: if (inserting) fs_usage->nr_inodes++; else fs_usage->nr_inodes--; - return 0; + break; case KEY_TYPE_reservation: { unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; @@ -940,11 +979,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, fs_usage->reserved += sectors; fs_usage->persistent_reserved[replicas - 1] += sectors; - return 0; + break; } - default: - return 0; } + + preempt_enable(); + + return ret; } int bch2_mark_key_locked(struct bch_fs *c, @@ -976,25 +1017,19 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, } void bch2_mark_update(struct btree_trans *trans, - struct btree_insert_entry *insert) + struct btree_insert_entry *insert, + struct bch_fs_usage *fs_usage) { struct bch_fs *c = trans->c; struct btree_iter *iter = insert->iter; struct btree *b = iter->l[0].b; struct btree_node_iter node_iter = iter->l[0].iter; - struct bch_fs_usage *fs_usage; struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; - u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; - static int warned_disk_usage = 0; if (!btree_node_type_needs_gc(iter->btree_id)) return; - percpu_down_read(&c->mark_lock); - preempt_disable(); - fs_usage = bch2_fs_usage_get_scratch(c); - if (!(trans->flags & BTREE_INSERT_NOMARK)) bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - @@ -1047,16 +1082,32 @@ void bch2_mark_update(struct btree_trans *trans, bch2_btree_node_iter_advance(&node_iter, b); } +} - if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) && - !warned_disk_usage && - !xchg(&warned_disk_usage, 1)) { - char buf[200]; +void bch2_trans_fs_usage_apply(struct btree_trans *trans, + struct bch_fs_usage *fs_usage) +{ + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + static int warned_disk_usage = 0; + u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; + char buf[200]; - pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors); + if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) || + warned_disk_usage || + xchg(&warned_disk_usage, 1)) + return; + + pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors); + + trans_for_each_update_iter(trans, i) { + struct btree_iter *iter = i->iter; + struct btree *b = iter->l[0].b; + struct btree_node_iter node_iter = iter->l[0].iter; + struct bkey_packed *_k; pr_err("while inserting"); - bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); pr_err("%s", buf); pr_err("overlapping with"); @@ -1069,8 +1120,8 @@ void bch2_mark_update(struct btree_trans *trans, k = bkey_disassemble(b, _k, &unpacked); if (btree_node_is_extents(b) - ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0 - : bkey_cmp(insert->k->k.p, k.k->p)) + ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0 + : bkey_cmp(i->k->k.p, k.k->p)) break; bch2_bkey_val_to_text(&PBUF(buf), c, k); @@ -1079,9 +1130,6 @@ void bch2_mark_update(struct btree_trans *trans, bch2_btree_node_iter_advance(&node_iter, b); } } - - preempt_enable(); - percpu_up_read(&c->mark_lock); } /* Disk reservations: */ diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index fc2c212392b6..e34c9d24dc38 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -219,13 +219,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c) READ_ONCE(c->replicas.nr); } -static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c) -{ - struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch); - - memset(ret, 0, fs_usage_u64s(c) * sizeof(u64)); - return ret; -} +void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *); +struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *); struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *); @@ -256,10 +251,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); -void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *); int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *, struct disk_reservation *); +void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *, + struct bch_fs_usage *); +void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *); + /* disk reservations: */ void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 80531017b237..194b8d6da1bb 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1190,11 +1190,12 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, if (s.deleting) tmp.k.k.type = KEY_TYPE_discard; - +#if 0 + /* disabled due to lock recursion - mark_lock: */ if (debug_check_bkeys(c)) bch2_bkey_debugcheck(c, iter->l[0].b, bkey_i_to_s_c(&tmp.k)); - +#endif EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size); extent_bset_insert(c, iter, &tmp.k); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 72592df9afc0..b66217989b71 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -207,22 +207,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r, return __replicas_entry_idx(r, search) >= 0; } +static bool bch2_replicas_marked_locked(struct bch_fs *c, + struct bch_replicas_entry *search, + bool check_gc_replicas) +{ + if (!search->nr_devs) + return true; + + verify_replicas_entry_sorted(search); + + return __replicas_has_entry(&c->replicas, search) && + (!check_gc_replicas || + likely((!c->replicas_gc.entries)) || + __replicas_has_entry(&c->replicas_gc, search)); +} + bool bch2_replicas_marked(struct bch_fs *c, struct bch_replicas_entry *search, bool check_gc_replicas) { bool marked; - if (!search->nr_devs) - return true; - - verify_replicas_entry_sorted(search); - percpu_down_read(&c->mark_lock); - marked = __replicas_has_entry(&c->replicas, search) && - (!check_gc_replicas || - likely((!c->replicas_gc.entries)) || - __replicas_has_entry(&c->replicas_gc, search)); + marked = bch2_replicas_marked_locked(c, search, check_gc_replicas); percpu_up_read(&c->mark_lock); return marked; @@ -263,7 +270,7 @@ static int replicas_table_update(struct bch_fs *c, struct bch_replicas_cpu *new_r) { struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL }; - struct bch_fs_usage __percpu *new_scratch = NULL; + struct bch_fs_usage *new_scratch = NULL; unsigned bytes = sizeof(struct bch_fs_usage) + sizeof(u64) * new_r->nr; int ret = -ENOMEM; @@ -273,8 +280,7 @@ static int replicas_table_update(struct bch_fs *c, (c->usage[1] && !(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO))) || - !(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64), - GFP_NOIO))) + !(new_scratch = kmalloc(bytes, GFP_NOIO))) goto err; if (c->usage[0]) @@ -290,7 +296,7 @@ static int replicas_table_update(struct bch_fs *c, swap(c->replicas, *new_r); ret = 0; err: - free_percpu(new_scratch); + kfree(new_scratch); free_percpu(new_usage[1]); free_percpu(new_usage[0]); return ret; @@ -390,9 +396,9 @@ int bch2_mark_replicas(struct bch_fs *c, : bch2_mark_replicas_slowpath(c, r); } -bool bch2_bkey_replicas_marked(struct bch_fs *c, - struct bkey_s_c k, - bool check_gc_replicas) +bool bch2_bkey_replicas_marked_locked(struct bch_fs *c, + struct bkey_s_c k, + bool check_gc_replicas) { struct bch_replicas_padded search; struct bch_devs_list cached = bch2_bkey_cached_devs(k); @@ -401,13 +407,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c, for (i = 0; i < cached.nr; i++) { bch2_replicas_entry_cached(&search.e, cached.devs[i]); - if (!bch2_replicas_marked(c, &search.e, check_gc_replicas)) + if (!bch2_replicas_marked_locked(c, &search.e, + check_gc_replicas)) return false; } bkey_to_replicas(&search.e, k); - return bch2_replicas_marked(c, &search.e, check_gc_replicas); + return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas); +} + +bool bch2_bkey_replicas_marked(struct bch_fs *c, + struct bkey_s_c k, + bool check_gc_replicas) +{ + bool marked; + + percpu_down_read(&c->mark_lock); + marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas); + percpu_up_read(&c->mark_lock); + + return marked; } int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index d1457c786bb5..0777e7056d55 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -26,6 +26,8 @@ bool bch2_replicas_marked(struct bch_fs *, int bch2_mark_replicas(struct bch_fs *, struct bch_replicas_entry *); +bool bch2_bkey_replicas_marked_locked(struct bch_fs *, + struct bkey_s_c, bool); bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c, bool); int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 4f627e91f041..b1eb70556f75 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -404,7 +404,7 @@ static void bch2_fs_free(struct bch_fs *c) bch2_io_clock_exit(&c->io_clock[READ]); bch2_fs_compress_exit(c); percpu_free_rwsem(&c->mark_lock); - free_percpu(c->usage_scratch); + kfree(c->usage_scratch); free_percpu(c->usage[0]); free_percpu(c->pcpu); mempool_exit(&c->btree_iters_pool); @@ -572,6 +572,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) mutex_init(&c->btree_reserve_cache_lock); mutex_init(&c->btree_interior_update_lock); + mutex_init(&c->usage_scratch_lock); + mutex_init(&c->bio_bounce_pages_lock); bio_list_init(&c->btree_write_error_list);