From 134915f3d38d830374603b84a9fe2e280f4814ed Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Mar 2019 22:19:57 -0400 Subject: [PATCH] bcachefs: Go rw lazily Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 32 ++------ fs/bcachefs/bcachefs.h | 14 +--- fs/bcachefs/btree_update.h | 2 + fs/bcachefs/btree_update_leaf.c | 19 ++++- fs/bcachefs/fs.c | 10 +-- fs/bcachefs/fsck.c | 16 ++-- fs/bcachefs/journal.c | 2 - fs/bcachefs/journal_io.c | 2 + fs/bcachefs/recovery.c | 25 +++---- fs/bcachefs/super-io.c | 11 +-- fs/bcachefs/super-io.h | 3 +- fs/bcachefs/super.c | 128 +++++++++++++++++++++++--------- fs/bcachefs/super.h | 4 +- fs/bcachefs/sysfs.c | 4 +- 14 files changed, 155 insertions(+), 117 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index c11136506352..da25a1ed5206 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -345,6 +345,7 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) ret = bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_NOMARK); err: @@ -1626,7 +1627,7 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c) return ret; } -static int __bch2_fs_allocator_start(struct bch_fs *c) +int bch2_fs_allocator_start(struct bch_fs *c) { struct bch_dev *ca; unsigned dev_iter; @@ -1635,6 +1636,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) long bu; int ret = 0; + if (!test_alloc_startup(c) && + bch2_fs_allocator_start_fast(c)) + return 0; + pr_debug("not enough empty buckets; scanning for reclaimable buckets"); /* @@ -1709,31 +1714,6 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) return ret; } -int bch2_fs_allocator_start(struct bch_fs *c) -{ - struct bch_dev *ca; - unsigned i; - int ret; - - ret = bch2_fs_allocator_start_fast(c) ? 0 : - __bch2_fs_allocator_start(c); - if (ret) - return ret; - - set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); - - for_each_rw_member(ca, c, i) { - ret = bch2_dev_allocator_start(ca); - if (ret) { - percpu_ref_put(&ca->io_ref); - return ret; - } - } - - set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); - return 0; -} - void bch2_fs_allocator_background_init(struct bch_fs *c) { spin_lock_init(&c->freelist_lock); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5a9b776558f6..5eae18e92bd5 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -486,6 +486,7 @@ enum { BCH_FS_INITIAL_GC_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, + BCH_FS_RW, /* shutdown: */ BCH_FS_EMERGENCY_RO, @@ -510,13 +511,6 @@ struct btree_debug { struct dentry *failed; }; -enum bch_fs_state { - BCH_FS_STARTING = 0, - BCH_FS_STOPPING, - BCH_FS_RO, - BCH_FS_RW, -}; - struct bch_fs_pcpu { u64 sectors_available; }; @@ -538,7 +532,6 @@ struct bch_fs { /* ro/rw, add/remove devices: */ struct mutex state_lock; - enum bch_fs_state state; /* Counts outstanding writes, for clean transition to read-only */ struct percpu_ref writes; @@ -800,11 +793,6 @@ static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) #endif } -static inline bool bch2_fs_running(struct bch_fs *c) -{ - return c->state == BCH_FS_RO || c->state == BCH_FS_RW; -} - static inline unsigned bucket_bytes(const struct bch_dev *ca) { return ca->mi.bucket_size << 9; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 4d7cef75a017..879e7ae39586 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -38,6 +38,7 @@ enum { __BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOCHECK_RW, + __BTREE_INSERT_LAZY_RW, __BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_JOURNAL_REPLAY, @@ -64,6 +65,7 @@ enum { #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) #define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW) +#define BTREE_INSERT_LAZY_RW (1 << __BTREE_INSERT_LAZY_RW) /* for copygc, or when merging btree nodes */ #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a05fd7104a72..9c1ca9ad3ead 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -849,8 +849,23 @@ int bch2_trans_commit(struct btree_trans *trans, btree_insert_entry_checks(trans, i); if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && - !percpu_ref_tryget(&c->writes))) - return -EROFS; + !percpu_ref_tryget(&c->writes))) { + if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW))) + return -EROFS; + + btree_trans_unlock(trans); + + ret = bch2_fs_read_write_early(c); + if (ret) + return ret; + + percpu_ref_get(&c->writes); + + if (!btree_trans_relock(trans)) { + ret = -EINTR; + goto err; + } + } retry: ret = bch2_trans_journal_preres_get(trans); if (ret) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index dc55d36ecfd5..2f01d97470b1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1616,7 +1616,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons mutex_lock(&c->state_lock); - if (!bch2_fs_running(c)) { + if (!test_bit(BCH_FS_STARTED, &c->flags)) { mutex_unlock(&c->state_lock); closure_put(&c->cl); pr_err("err mounting %s: incomplete filesystem", dev_name); @@ -1672,8 +1672,6 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) return ret; if (opts.read_only != c->opts.read_only) { - const char *err = NULL; - mutex_lock(&c->state_lock); if (opts.read_only) { @@ -1681,9 +1679,9 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data) sb->s_flags |= SB_RDONLY; } else { - err = bch2_fs_read_write(c); - if (err) { - bch_err(c, "error going rw: %s", err); + ret = bch2_fs_read_write(c); + if (ret) { + bch_err(c, "error going rw: %i", ret); return -EINVAL; } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 2561773cd6dc..439f758d8178 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -174,7 +174,8 @@ static int hash_redo_key(const struct bch_hash_desc desc, bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode, tmp, BCH_HASH_SET_MUST_CREATE); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); err: kfree(tmp); return ret; @@ -204,7 +205,8 @@ static int fsck_hash_delete_at(const struct bch_hash_desc desc, ret = bch2_hash_delete_at(&trans, desc, info, iter) ?: bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); err: if (ret == -EINTR) goto retry; @@ -365,7 +367,9 @@ static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h, buf, strlen(buf), d->v.d_name, len)) { bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i)); - ret = bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL); + ret = bch2_trans_commit(trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); if (ret) goto err; @@ -630,7 +634,8 @@ static int check_dirents(struct bch_fs *c) BTREE_INSERT_ENTRY(iter, &n->k_i)); ret = bch2_trans_commit(&trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); kfree(n); if (ret) goto err; @@ -1268,7 +1273,8 @@ static int check_inode(struct btree_trans *trans, bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i)); ret = bch2_trans_commit(trans, NULL, NULL, - BTREE_INSERT_NOFAIL); + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW); if (ret && ret != -EINTR) bch_err(c, "error in fs gc: error %i " "updating inode", ret); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 64f9c5740ec8..c0dcc0ff65ce 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1027,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j) * only have to go down with the next journal entry we write: */ bch2_journal_seq_blacklist_write(j); - - queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0); } /* init/exit: */ diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index d20672a37fd3..1bb627c05188 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -861,6 +861,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k) ret = bch2_trans_commit(&trans, &disk_res, NULL, BTREE_INSERT_ATOMIC| BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY); } while ((!ret || ret == -EINTR) && bkey_cmp(k->k.p, iter->pos)); @@ -906,6 +907,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) ret = bch2_btree_insert(c, entry->btree_id, k, NULL, NULL, BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW| BTREE_INSERT_JOURNAL_REPLAY| BTREE_INSERT_NOMARK); break; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d7be535f3cc1..f7e3060428cf 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -119,8 +119,13 @@ static int verify_superblock_clean(struct bch_fs *c, if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, "superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown", le64_to_cpu(clean->journal_seq), - le64_to_cpu(j->seq))) - bch2_fs_mark_clean(c, false); + le64_to_cpu(j->seq))) { + ret = bch2_fs_mark_dirty(c); + if (ret) { + bch_err(c, "error going rw"); + return ret; + } + } mustfix_fsck_err_on(j->read_clock != clean->read_clock, c, "superblock read clock doesn't match journal after clean shutdown"); @@ -331,13 +336,6 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.noreplay) goto out; - /* - * Mark dirty before journal replay, fsck: - * XXX: after a clean shutdown, this could be done lazily only when fsck - * finds an error - */ - bch2_fs_mark_clean(c, false); - /* * bch2_fs_journal_start() can't happen sooner, or btree_gc_finish() * will give spurious errors about oldest_gen > bucket_gen - @@ -345,11 +343,6 @@ int bch2_fs_recovery(struct bch_fs *c) */ bch2_fs_journal_start(&c->journal); - err = "error starting allocator"; - ret = bch2_fs_allocator_start(c); - if (ret) - goto err; - bch_verbose(c, "starting journal replay:"); err = "journal replay failed"; ret = bch2_journal_replay(c, &journal); @@ -436,8 +429,8 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_fs_journal_start(&c->journal); bch2_journal_set_replay_done(&c->journal); - err = "error starting allocator"; - ret = bch2_fs_allocator_start(c); + err = "error going read write"; + ret = bch2_fs_read_write_early(c); if (ret) goto err; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index c89fe5d630e4..dec6a737f44f 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -886,7 +886,7 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); } -static void bch2_fs_mark_dirty(struct bch_fs *c) +int bch2_fs_mark_dirty(struct bch_fs *c) { mutex_lock(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb) || @@ -896,6 +896,8 @@ static void bch2_fs_mark_dirty(struct bch_fs *c) bch2_write_super(c); } mutex_unlock(&c->sb_lock); + + return 0; } struct jset_entry * @@ -997,17 +999,12 @@ bch2_journal_super_entries_add_common(struct bch_fs *c, return entry; } -void bch2_fs_mark_clean(struct bch_fs *c, bool clean) +void bch2_fs_mark_clean(struct bch_fs *c) { struct bch_sb_field_clean *sb_clean; struct jset_entry *entry; unsigned u64s; - if (!clean) { - bch2_fs_mark_dirty(c); - return; - } - mutex_lock(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb)) goto out; diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index 498a9e887d4e..afc92d14c254 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -141,7 +141,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *, void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); -void bch2_fs_mark_clean(struct bch_fs *, bool); +int bch2_fs_mark_dirty(struct bch_fs *); +void bch2_fs_mark_clean(struct bch_fs *); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, struct bch_sb_field *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index e8242bb70b93..5364b95cfec9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -258,8 +258,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { - if (c->state == BCH_FS_RO) + if (!test_bit(BCH_FS_RW, &c->flags)) { + cancel_delayed_work_sync(&c->journal.reclaim_work); return; + } BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); @@ -301,10 +303,9 @@ void bch2_fs_read_only(struct bch_fs *c) !test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags)) - bch2_fs_mark_clean(c, true); + bch2_fs_mark_clean(c); - if (c->state != BCH_FS_STOPPING) - c->state = BCH_FS_RO; + clear_bit(BCH_FS_RW, &c->flags); } static void bch2_fs_read_only_work(struct work_struct *work) @@ -333,55 +334,106 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } -const char *bch2_fs_read_write(struct bch_fs *c) +static int bch2_fs_read_write_late(struct bch_fs *c) { struct bch_dev *ca; - const char *err = NULL; unsigned i; + int ret; - if (c->state == BCH_FS_RW) - return NULL; + ret = bch2_gc_thread_start(c); + if (ret) { + bch_err(c, "error starting gc thread"); + return ret; + } - bch2_fs_mark_clean(c, false); + for_each_rw_member(ca, c, i) { + ret = bch2_copygc_start(c, ca); + if (ret) { + bch_err(c, "error starting copygc threads"); + percpu_ref_put(&ca->io_ref); + return ret; + } + } + + ret = bch2_rebalance_start(c); + if (ret) { + bch_err(c, "error starting rebalance thread"); + return ret; + } + + schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); + + return 0; +} + +static int __bch2_fs_read_write(struct bch_fs *c, bool early) +{ + struct bch_dev *ca; + unsigned i; + int ret; + + if (test_bit(BCH_FS_RW, &c->flags)) + return 0; + + ret = bch2_fs_mark_dirty(c); + if (ret) + goto err; for_each_rw_member(ca, c, i) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); - err = "error starting allocator thread"; - for_each_rw_member(ca, c, i) - if (bch2_dev_allocator_start(ca)) { + if (!test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) { + ret = bch2_fs_allocator_start(c); + if (ret) { + bch_err(c, "error initializing allocator"); + goto err; + } + + set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags); + } + + for_each_rw_member(ca, c, i) { + ret = bch2_dev_allocator_start(ca); + if (ret) { + bch_err(c, "error starting allocator threads"); percpu_ref_put(&ca->io_ref); goto err; } + } set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); - err = "error starting btree GC thread"; - if (bch2_gc_thread_start(c)) - goto err; - - err = "error starting copygc thread"; - for_each_rw_member(ca, c, i) - if (bch2_copygc_start(c, ca)) { - percpu_ref_put(&ca->io_ref); + if (!early) { + ret = bch2_fs_read_write_late(c); + if (ret) goto err; - } + } - err = "error starting rebalance thread"; - if (bch2_rebalance_start(c)) - goto err; + percpu_ref_reinit(&c->writes); + set_bit(BCH_FS_RW, &c->flags); - schedule_delayed_work(&c->pd_controllers_update, 5 * HZ); - - if (c->state != BCH_FS_STARTING) - percpu_ref_reinit(&c->writes); - - c->state = BCH_FS_RW; - return NULL; + queue_delayed_work(c->journal_reclaim_wq, + &c->journal.reclaim_work, 0); + return 0; err: __bch2_fs_read_only(c); - return err; + return ret; +} + +int bch2_fs_read_write(struct bch_fs *c) +{ + return __bch2_fs_read_write(c, false); +} + +int bch2_fs_read_write_early(struct bch_fs *c) +{ + lockdep_assert_held(&c->state_lock); + + if (c->opts.read_only) + return -EROFS; + + return __bch2_fs_read_write(c, true); } /* Filesystem startup/shutdown: */ @@ -638,7 +690,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || !(c->journal_reclaim_wq = alloc_workqueue("bcache_journal", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || - percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) || + percpu_ref_init(&c->writes, bch2_writes_disabled, + PERCPU_REF_INIT_DEAD, GFP_KERNEL) || mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1, sizeof(struct btree_reserve)) || mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1, @@ -712,7 +765,7 @@ const char *bch2_fs_start(struct bch_fs *c) mutex_lock(&c->state_lock); - BUG_ON(c->state != BCH_FS_STARTING); + BUG_ON(test_bit(BCH_FS_STARTED, &c->flags)); mutex_lock(&c->sb_lock); @@ -746,9 +799,12 @@ const char *bch2_fs_start(struct bch_fs *c) if (c->opts.read_only) { bch2_fs_read_only(c); } else { - err = bch2_fs_read_write(c); - if (err) + if (!test_bit(BCH_FS_RW, &c->flags) + ? bch2_fs_read_write(c) + : bch2_fs_read_write_late(c)) { + err = "error going read write"; goto err; + } } set_bit(BCH_FS_STARTED, &c->flags); diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index 3f730164ca69..91df0d729322 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -217,7 +217,9 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); -const char *bch2_fs_read_write(struct bch_fs *); + +int bch2_fs_read_write(struct bch_fs *); +int bch2_fs_read_write_early(struct bch_fs *); void bch2_fs_stop(struct bch_fs *); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 361f7b7addcf..f1e269671374 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -289,7 +289,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) compressed_sectors_compressed = 0, compressed_sectors_uncompressed = 0; - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) @@ -482,7 +482,7 @@ STORE(__bch2_fs) BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM - if (!bch2_fs_running(c)) + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; /* Debugging: */