bcachefs: data jobs, including rebalance wait for copygc.

move_ratelimit() now has a bool that specifies whether we want to
wait for copygc to finish.

When copygc is running, we're probably low on free buckets instead
of consuming the remaining buckets, we want to wait for copygc to
finish.

This should help with performance, and run away bucket fragmentation.

Signed-off-by: Daniel Hill <daniel@gluo.nz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Daniel Hill 2022-06-16 02:06:43 +12:00 committed by Kent Overstreet
parent 7f5c5d20f0
commit c91996c50a
5 changed files with 81 additions and 53 deletions

View file

@ -825,6 +825,8 @@ mempool_t bio_bounce_pages;
copygc_heap copygc_heap;
struct write_point copygc_write_point;
s64 copygc_wait;
bool copygc_running;
wait_queue_head_t copygc_running_wq;
/* DATA PROGRESS STATS */
struct list_head data_progress_list;

View file

@ -237,24 +237,72 @@ static int lookup_inode(struct btree_trans *trans, struct bpos pos,
return ret;
}
static int __bch2_move_data(struct bch_fs *c,
struct moving_context *ctxt,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats,
enum btree_id btree_id)
static int move_ratelimit(struct btree_trans *trans,
struct moving_context *ctxt,
struct bch_ratelimit *rate,
bool wait_on_copygc)
{
struct bch_fs *c = trans->c;
u64 delay;
if (wait_on_copygc) {
bch2_trans_unlock(trans);
wait_event_killable(c->copygc_running_wq,
!c->copygc_running ||
kthread_should_stop());
}
do {
delay = rate ? bch2_ratelimit_delay(rate) : 0;
if (delay) {
bch2_trans_unlock(trans);
set_current_state(TASK_INTERRUPTIBLE);
}
if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
__set_current_state(TASK_RUNNING);
return 1;
}
if (delay)
schedule_timeout(delay);
if (unlikely(freezing(current))) {
move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
try_to_freeze();
}
} while (delay);
move_ctxt_wait_event(ctxt, trans,
atomic_read(&ctxt->write_sectors) <
c->opts.move_bytes_in_flight >> 9);
move_ctxt_wait_event(ctxt, trans,
atomic_read(&ctxt->read_sectors) <
c->opts.move_bytes_in_flight >> 9);
return 0;
}
static int __bch2_move_data(struct bch_fs *c,
struct moving_context *ctxt,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats,
enum btree_id btree_id,
bool wait_on_copygc)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct bkey_buf sk;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
struct data_update_opts data_opts;
u64 delay, cur_inum = U64_MAX;
u64 cur_inum = U64_MAX;
int ret = 0, ret2;
bch2_bkey_buf_init(&sk);
@ -271,37 +319,7 @@ static int __bch2_move_data(struct bch_fs *c,
if (rate)
bch2_ratelimit_reset(rate);
while (1) {
do {
delay = rate ? bch2_ratelimit_delay(rate) : 0;
if (delay) {
bch2_trans_unlock(&trans);
set_current_state(TASK_INTERRUPTIBLE);
}
if (kthread && (ret = kthread_should_stop())) {
__set_current_state(TASK_RUNNING);
goto out;
}
if (delay)
schedule_timeout(delay);
if (unlikely(freezing(current))) {
move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
try_to_freeze();
}
} while (delay);
move_ctxt_wait_event(ctxt, &trans,
atomic_read(&ctxt->write_sectors) <
c->opts.move_bytes_in_flight >> 9);
move_ctxt_wait_event(ctxt, &trans,
atomic_read(&ctxt->read_sectors) <
c->opts.move_bytes_in_flight >> 9);
while (!move_ratelimit(&trans, ctxt, rate, wait_on_copygc)) {
bch2_trans_begin(&trans);
k = bch2_btree_iter_peek(&iter);
@ -374,7 +392,6 @@ static int __bch2_move_data(struct bch_fs *c,
next_nondata:
bch2_btree_iter_advance(&iter);
}
out:
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
@ -413,7 +430,8 @@ int bch2_move_data(struct bch_fs *c,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
struct bch_move_stats *stats,
bool wait_on_copygc)
{
struct moving_context ctxt = { .stats = stats };
enum btree_id id;
@ -438,7 +456,7 @@ int bch2_move_data(struct bch_fs *c,
ret = __bch2_move_data(c, &ctxt, rate, wp,
id == start_btree_id ? start_pos : POS_MIN,
id == end_btree_id ? end_pos : POS_MAX,
pred, arg, stats, id);
pred, arg, stats, id, wait_on_copygc);
if (ret)
break;
}
@ -675,7 +693,7 @@ int bch2_data_job(struct bch_fs *c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
rereplicate_pred, c, stats) ?: ret;
rereplicate_pred, c, stats, true) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_MIGRATE:
@ -696,7 +714,7 @@ int bch2_data_job(struct bch_fs *c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
migrate_pred, &op, stats) ?: ret;
migrate_pred, &op, stats, true) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_REWRITE_OLD_NODES:

View file

@ -35,7 +35,8 @@ int bch2_move_data(struct bch_fs *,
struct bch_ratelimit *,
struct write_point_specifier,
move_pred_fn, void *,
struct bch_move_stats *);
struct bch_move_stats *,
bool);
int bch2_data_job(struct bch_fs *,
struct bch_move_stats *,

View file

@ -316,7 +316,8 @@ static int bch2_copygc(struct bch_fs *c)
NULL,
writepoint_ptr(&c->copygc_write_point),
copygc_pred, NULL,
&move_stats);
&move_stats,
false);
if (ret < 0)
bch_err(c, "error %i from bch2_move_data() in copygc", ret);
if (ret)
@ -381,10 +382,11 @@ static int bch2_copygc_thread(void *arg)
struct bch_fs *c = arg;
struct io_clock *clock = &c->io_clock[WRITE];
u64 last, wait;
int ret = 0;
set_freezable();
while (!kthread_should_stop()) {
while (!ret && !kthread_should_stop()) {
cond_resched();
if (kthread_wait_freezable(c->copy_gc_enabled))
@ -403,8 +405,11 @@ static int bch2_copygc_thread(void *arg)
c->copygc_wait = 0;
if (bch2_copygc(c))
break;
c->copygc_running = true;
ret = bch2_copygc(c);
c->copygc_running = false;
wake_up(&c->copygc_running_wq);
}
return 0;
@ -448,4 +453,6 @@ int bch2_copygc_start(struct bch_fs *c)
void bch2_fs_copygc_init(struct bch_fs *c)
{
init_waitqueue_head(&c->copygc_running_wq);
c->copygc_running = false;
}

View file

@ -255,7 +255,7 @@ static int bch2_rebalance_thread(void *arg)
NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point),
rebalance_pred, NULL,
&move_stats);
&move_stats, true);
}
return 0;