diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 8f0c1f378b77..078968f30175 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1409,7 +1409,7 @@ int bch2_dev_allocator_start(struct bch_dev *ca) return 0; p = kthread_create(bch2_allocator_thread, ca, - "bch_alloc[%s]", ca->name); + "bch-alloc/%s", ca->name); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index d77d1fc1cfed..4fe3f9257752 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -650,7 +650,6 @@ struct bch_fs { struct workqueue_struct *wq; /* copygc needs its own workqueue for index updates.. */ struct workqueue_struct *copygc_wq; - struct workqueue_struct *journal_reclaim_wq; /* ALLOCATION */ struct delayed_work pd_controllers_update; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index da0ad8f50775..df018a2e463e 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1427,7 +1427,7 @@ int bch2_gc_thread_start(struct bch_fs *c) BUG_ON(c->gc_thread); - p = kthread_create(bch2_gc_thread, c, "bch_gc"); + p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 99e03852b814..d1f226e66158 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,7 +497,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, &ck->journal, btree_key_cache_journal_flush); if (kick_reclaim) - mod_delayed_work(c->journal_reclaim_wq, &c->journal.reclaim_work, 0); + journal_reclaim_kick(&c->journal); return true; } diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index cd5c850a41ec..7c77fd09c834 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -341,7 +341,8 @@ static long bch2_ioctl_data(struct bch_fs *c, ctx->c = c; ctx->arg = arg; - ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]"); + ctx->thread = kthread_create(bch2_data_thread, ctx, + "bch-data/%s", c->name); if (IS_ERR(ctx->thread)) { ret = PTR_ERR(ctx->thread); goto err; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index bb4353e673e7..2c6aa36cc025 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -225,11 +225,14 @@ static bool journal_entry_close(struct journal *j) */ static int journal_entry_open(struct journal *j) { + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf = journal_cur_buf(j); union journal_res_state old, new; int u64s; u64 v; + BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + lockdep_assert_held(&j->lock); BUG_ON(journal_entry_is_open(j)); @@ -480,8 +483,10 @@ static bool journal_preres_available(struct journal *j, { bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags); - if (!ret) - bch2_journal_reclaim_work(&j->reclaim_work.work); + if (!ret && mutex_trylock(&j->reclaim_lock)) { + bch2_journal_reclaim(j); + mutex_unlock(&j->reclaim_lock); + } return ret; } @@ -888,7 +893,7 @@ void bch2_fs_journal_stop(struct journal *j) j->last_empty_seq + 1 != journal_cur_seq(j))); cancel_delayed_work_sync(&j->write_work); - cancel_delayed_work_sync(&j->reclaim_work); + bch2_journal_reclaim_stop(j); } int bch2_fs_journal_start(struct journal *j, u64 cur_seq, @@ -1019,7 +1024,6 @@ int bch2_fs_journal_init(struct journal *j) spin_lock_init(&j->err_lock); init_waitqueue_head(&j->wait); INIT_DELAYED_WORK(&j->write_work, journal_write_work); - INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work); init_waitqueue_head(&j->pin_flush_wait); mutex_init(&j->reclaim_lock); mutex_init(&j->discard_lock); @@ -1071,6 +1075,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) "last_seq:\t\t%llu\n" "last_seq_ondisk:\t%llu\n" "prereserved:\t\t%u/%u\n" + "nr direct reclaim:\t%llu\n" + "nr background reclaim:\t%llu\n" "current entry sectors:\t%u\n" "current entry error:\t%u\n" "current entry:\t\t", @@ -1080,6 +1086,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) j->last_seq_ondisk, j->prereserved.reserved, j->prereserved.remaining, + j->nr_direct_reclaim, + j->nr_background_reclaim, j->cur_entry_sectors, j->cur_entry_error); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 354d57a3cd59..79d5d892728f 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -993,7 +993,7 @@ static void journal_write_done(struct closure *cl) * Must come before signaling write completion, for * bch2_fs_journal_stop(): */ - mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0); + journal_reclaim_kick(&c->journal); /* also must come before signalling write completion: */ closure_debug_destroy(cl); @@ -1044,6 +1044,8 @@ void bch2_journal_write(struct closure *cl) unsigned i, sectors, bytes, u64s; int ret; + BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + bch2_journal_pin_put(j, le64_to_cpu(w->data->seq)); journal_buf_realloc(j, w); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 9c67597d1ec6..9f0d2e6aa4e3 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -9,6 +9,7 @@ #include "super.h" #include "trace.h" +#include #include /* Free space calculations: */ @@ -534,9 +535,10 @@ static u64 journal_seq_to_flush(struct journal *j) * 512 journal entries or 25% of all journal buckets, then * journal_next_bucket() should not stall. */ -void bch2_journal_reclaim(struct journal *j) +static void __bch2_journal_reclaim(struct journal *j, bool direct) { struct bch_fs *c = container_of(j, struct bch_fs, journal); + bool kthread = (current->flags & PF_KTHREAD) != 0; u64 seq_to_flush, nr_flushed = 0; size_t min_nr; unsigned flags; @@ -551,6 +553,9 @@ void bch2_journal_reclaim(struct journal *j) flags = memalloc_noreclaim_save(); do { + if (kthread && kthread_should_stop()) + break; + bch2_journal_do_discards(j); seq_to_flush = journal_seq_to_flush(j); @@ -582,26 +587,83 @@ void bch2_journal_reclaim(struct journal *j) c->btree_key_cache.nr_dirty, c->btree_key_cache.nr_keys); - nr_flushed += journal_flush_pins(j, seq_to_flush, min_nr); + nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr); + + if (direct) + j->nr_direct_reclaim += nr_flushed; + else + j->nr_background_reclaim += nr_flushed; + trace_journal_reclaim_finish(c, nr_flushed); } while (min_nr); memalloc_noreclaim_restore(flags); - - trace_journal_reclaim_finish(c, nr_flushed); - - if (!bch2_journal_error(j)) - queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, - msecs_to_jiffies(j->reclaim_delay_ms)); } -void bch2_journal_reclaim_work(struct work_struct *work) +void bch2_journal_reclaim(struct journal *j) { - struct journal *j = container_of(to_delayed_work(work), - struct journal, reclaim_work); + __bch2_journal_reclaim(j, true); +} - mutex_lock(&j->reclaim_lock); - bch2_journal_reclaim(j); - mutex_unlock(&j->reclaim_lock); +static int bch2_journal_reclaim_thread(void *arg) +{ + struct journal *j = arg; + unsigned long next; + + while (!kthread_should_stop()) { + j->reclaim_kicked = false; + + mutex_lock(&j->reclaim_lock); + __bch2_journal_reclaim(j, false); + mutex_unlock(&j->reclaim_lock); + + next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms); + + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + if (j->reclaim_kicked) + break; + if (time_after_eq(jiffies, next)) + break; + schedule_timeout(next - jiffies); + + } + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +void bch2_journal_reclaim_stop(struct journal *j) +{ + struct task_struct *p = j->reclaim_thread; + + j->reclaim_thread = NULL; + + if (p) { + kthread_stop(p); + put_task_struct(p); + } +} + +int bch2_journal_reclaim_start(struct journal *j) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct task_struct *p; + + if (j->reclaim_thread) + return 0; + + p = kthread_create(bch2_journal_reclaim_thread, j, + "bch-reclaim/%s", c->name); + if (IS_ERR(p)) + return PTR_ERR(p); + + get_task_struct(p); + j->reclaim_thread = p; + wake_up_process(p); + return 0; } static int journal_flush_done(struct journal *j, u64 seq_to_flush, diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index 8128907a7623..bae2c9210db8 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -10,6 +10,17 @@ enum journal_space_from { journal_space_clean, }; +static inline void journal_reclaim_kick(struct journal *j) +{ + struct task_struct *p = READ_ONCE(j->reclaim_thread); + + if (p && !j->reclaim_kicked) { + j->reclaim_kicked = true; + if (p) + wake_up_process(p); + } +} + unsigned bch2_journal_dev_buckets_available(struct journal *, struct journal_device *, enum journal_space_from); @@ -55,7 +66,9 @@ void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *); void bch2_journal_do_discards(struct journal *); void bch2_journal_reclaim(struct journal *); -void bch2_journal_reclaim_work(struct work_struct *); + +void bch2_journal_reclaim_stop(struct journal *); +int bch2_journal_reclaim_start(struct journal *); bool bch2_journal_flush_pins(struct journal *, u64); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 5f20653b8eb5..6312a7f06d87 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -216,8 +216,12 @@ struct journal { struct write_point wp; spinlock_t err_lock; - struct delayed_work reclaim_work; struct mutex reclaim_lock; + struct task_struct *reclaim_thread; + bool reclaim_kicked; + u64 nr_direct_reclaim; + u64 nr_background_reclaim; + unsigned long last_flushed; struct journal_entry_pin *flush_in_progress; wait_queue_head_t pin_flush_wait; diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index e858e2a35f8d..a9775cc84f66 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -345,7 +345,7 @@ int bch2_copygc_start(struct bch_fs *c) if (bch2_fs_init_fault("copygc_start")) return -ENOMEM; - t = kthread_create(bch2_copygc_thread, c, "bch_copygc"); + t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name); if (IS_ERR(t)) return PTR_ERR(t); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index cce6f58fe609..f9a12dd797a5 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -314,7 +314,7 @@ int bch2_rebalance_start(struct bch_fs *c) if (c->opts.nochanges) return 0; - p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance"); + p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); if (IS_ERR(p)) return PTR_ERR(p); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 12ce4a627746..98a875e08e9a 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -266,7 +265,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes) void bch2_fs_read_only(struct bch_fs *c) { if (!test_bit(BCH_FS_RW, &c->flags)) { - cancel_delayed_work_sync(&c->journal.reclaim_work); + BUG_ON(c->journal.reclaim_thread); return; } @@ -424,6 +423,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + ret = bch2_journal_reclaim_start(&c->journal); + if (ret) { + bch_err(c, "error starting journal reclaim: %i", ret); + return ret; + } + if (!early) { ret = bch2_fs_read_write_late(c); if (ret) @@ -432,9 +437,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) percpu_ref_reinit(&c->writes); set_bit(BCH_FS_RW, &c->flags); - - queue_delayed_work(c->journal_reclaim_wq, - &c->journal.reclaim_work, 0); return 0; err: __bch2_fs_read_only(c); @@ -503,8 +505,6 @@ static void __bch2_fs_free(struct bch_fs *c) kfree(c->unused_inode_hints); free_heap(&c->copygc_heap); - if (c->journal_reclaim_wq) - destroy_workqueue(c->journal_reclaim_wq); if (c->copygc_wq) destroy_workqueue(c->copygc_wq); if (c->wq) @@ -758,8 +758,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || !(c->copygc_wq = alloc_workqueue("bcachefs_copygc", WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || - !(c->journal_reclaim_wq = alloc_workqueue("bcachefs_journal_reclaim", - WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) || percpu_ref_init(&c->writes, bch2_writes_disabled, PERCPU_REF_INIT_DEAD, GFP_KERNEL) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||