diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index f60d530313dc..b6f93da37ba0 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -802,6 +802,10 @@ struct bch_fs { struct mutex verify_lock; #endif + struct mutex inode_create_lock; + unsigned unused_inodes_nr; + u64 unused_inodes[64]; + u32 unused_inodes_gens[64]; u64 unused_inode_hint; /* diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 878419d40992..503ce1920f39 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -34,9 +34,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum, if (!name) new_inode->bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_inode_create(trans, new_inode, - BLOCKDEV_INODE_MAX, 0, - &c->unused_inode_hint); + ret = bch2_inode_create(trans, new_inode); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 631c60bb2fac..c55c164be882 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -361,71 +361,120 @@ static inline u32 bkey_generation(struct bkey_s_c k) } } -int bch2_inode_create(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - u64 min, u64 max, u64 *hint) +static int scan_free_inums(struct btree_trans *trans) { - struct bkey_inode_buf *inode_p; + struct bch_fs *c = trans->c; struct btree_iter *iter = NULL; struct bkey_s_c k; - u64 start; - int ret; + u64 min = BLOCKDEV_INODE_MAX; + u64 max = c->opts.inodes_32bit + ? S32_MAX : S64_MAX; + u64 start = max(min, READ_ONCE(c->unused_inode_hint)); + int ret = 0; - if (!max) - max = ULLONG_MAX; - - if (trans->c->opts.inodes_32bit) - max = min_t(u64, max, U32_MAX); - - start = READ_ONCE(*hint); - - if (start >= max || start < min) - start = min; - - inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); - if (IS_ERR(inode_p)) - return PTR_ERR(inode_p); + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, start), + BTREE_ITER_SLOTS); + if (IS_ERR(iter)) + return PTR_ERR(iter); again: - for_each_btree_key(trans, iter, BTREE_ID_INODES, POS(0, start), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) { + for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) { if (bkey_cmp(iter->pos, POS(0, max)) > 0) break; /* - * There's a potential cache coherency issue with the btree key - * cache code here - we're iterating over the btree, skipping - * that cache. We should never see an empty slot that isn't - * actually empty due to a pending update in the key cache - * because the update that creates the inode isn't done with a - * cached iterator, but - better safe than sorry, check the - * cache before using a slot: + * This doesn't check the btree key cache, but we don't care: + * we have to recheck with an intent lock held on the slot we're + * inserting to anyways: */ - if (k.k->type != KEY_TYPE_inode && - !bch2_btree_key_cache_find(trans->c, BTREE_ID_INODES, iter->pos)) - goto found_slot; + if (k.k->type != KEY_TYPE_inode) { + if (c->unused_inodes_nr < ARRAY_SIZE(c->unused_inodes)) { + c->unused_inodes[c->unused_inodes_nr] = k.k->p.offset; + c->unused_inodes_gens[c->unused_inodes_nr] = bkey_generation(k); + c->unused_inodes_nr++; + } + + if (c->unused_inodes_nr == ARRAY_SIZE(c->unused_inodes)) + goto out; + } } - bch2_trans_iter_put(trans, iter); - - if (ret) - return ret; - - if (start != min) { - /* Retry from start */ + if (!ret && start != min) { + max = start; start = min; + bch2_btree_iter_set_pos(iter, POS(0, start)); goto again; } +out: + c->unused_inode_hint = iter->pos.offset; + bch2_trans_iter_put(trans, iter); + return ret; +} - return -ENOSPC; -found_slot: - *hint = k.k->p.offset; - inode_u->bi_inum = k.k->p.offset; - inode_u->bi_generation = bkey_generation(k); +int bch2_inode_create(struct btree_trans *trans, + struct bch_inode_unpacked *inode_u) +{ + struct bch_fs *c = trans->c; + struct bkey_inode_buf *inode_p; + struct btree_iter *iter = NULL; + struct bkey_s_c k; + u64 inum; + u32 generation; + int ret = 0; + + inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); + if (IS_ERR(inode_p)) + return PTR_ERR(inode_p); + + iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS_MIN, + BTREE_ITER_CACHED| + BTREE_ITER_INTENT); + if (IS_ERR(iter)) + return PTR_ERR(iter); +retry: + if (!mutex_trylock(&c->inode_create_lock)) { + bch2_trans_unlock(trans); + mutex_lock(&c->inode_create_lock); + if (!bch2_trans_relock(trans)) { + mutex_unlock(&c->inode_create_lock); + ret = -EINTR; + goto err; + } + } + + if (!c->unused_inodes_nr) + ret = scan_free_inums(trans); + if (!ret && !c->unused_inodes_nr) + ret = -ENOSPC; + if (!ret) { + --c->unused_inodes_nr; + inum = c->unused_inodes[c->unused_inodes_nr]; + generation = c->unused_inodes_gens[c->unused_inodes_nr]; + } + + mutex_unlock(&c->inode_create_lock); + + if (ret) + goto err; + + bch2_btree_iter_set_pos(iter, POS(0, inum)); + + /* Recheck that the slot is free with an intent lock held: */ + k = bch2_btree_iter_peek_cached(iter); + ret = bkey_err(k); + if (ret) + goto err; + + if (k.k->type == KEY_TYPE_inode) + goto retry; + + inode_u->bi_inum = inum; + inode_u->bi_generation = generation; bch2_inode_pack(inode_p, inode_u); bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); +err: bch2_trans_iter_put(trans, iter); - return 0; + return ret; } int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index bb759a46dc41..5743be2307f3 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -60,9 +60,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *, uid_t, gid_t, umode_t, dev_t, struct bch_inode_unpacked *); -int bch2_inode_create(struct btree_trans *, - struct bch_inode_unpacked *, - u64, u64, u64 *); +int bch2_inode_create(struct btree_trans *, struct bch_inode_unpacked *); int bch2_inode_rm(struct bch_fs *, u64); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 15e760d8dd4d..b8736a822630 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -703,6 +703,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) seqcount_init(&c->usage_lock); + mutex_init(&c->inode_create_lock); + c->copy_gc_enabled = 1; c->rebalance.enabled = 1; c->promote_whole_extents = true;