From c501fef6deb1de13d45d22a3df32906adf17275b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 13 Jun 2022 19:07:19 -0400 Subject: [PATCH] bcachefs: Pull out data_update.c This is the start of reorganizing the data IO paths. The plan is to also break apart io.c into data_read.c and data_write.c, and migrate_write will be renamed to the data_update path. Signed-off-by: Kent Overstreet --- fs/bcachefs/Makefile | 1 + fs/bcachefs/data_update.c | 383 +++++++++++++++++++++++++++++++++++++ fs/bcachefs/data_update.h | 48 +++++ fs/bcachefs/ec.h | 1 + fs/bcachefs/io.c | 9 +- fs/bcachefs/move.c | 393 +------------------------------------- fs/bcachefs/move.h | 53 ++--- 7 files changed, 456 insertions(+), 432 deletions(-) create mode 100644 fs/bcachefs/data_update.c create mode 100644 fs/bcachefs/data_update.h diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index fada601c10db..95b990ad0196 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -25,6 +25,7 @@ bcachefs-y := \ debug.o \ dirent.o \ disk_groups.o \ + data_update.o \ ec.o \ error.o \ extents.o \ diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c new file mode 100644 index 000000000000..0161b0a9f36e --- /dev/null +++ b/fs/bcachefs/data_update.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bcachefs.h" +#include "alloc_foreground.h" +#include "bkey_buf.h" +#include "btree_update.h" +#include "buckets.h" +#include "data_update.h" +#include "ec.h" +#include "extents.h" +#include "io.h" +#include "keylist.h" +#include "move.h" +#include "subvolume.h" +#include "trace.h" + +static int insert_snapshot_whiteouts(struct btree_trans *trans, + enum btree_id id, + struct bpos old_pos, + struct bpos new_pos) +{ + struct bch_fs *c = trans->c; + struct btree_iter iter, update_iter; + struct bkey_s_c k; + struct snapshots_seen s; + int ret; + + if (!btree_type_has_snapshots(id)) + return 0; + + snapshots_seen_init(&s); + + if (!bkey_cmp(old_pos, new_pos)) + return 0; + + if (!snapshot_t(c, old_pos.snapshot)->children[0]) + return 0; + + bch2_trans_iter_init(trans, &iter, id, old_pos, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); + while (1) { +next: + k = bch2_btree_iter_prev(&iter); + ret = bkey_err(k); + if (ret) + break; + + if (bkey_cmp(old_pos, k.k->p)) + break; + + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { + struct bkey_i *update; + u32 *i; + + darray_for_each(s.ids, i) + if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i)) + goto next; + + update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); + + ret = PTR_ERR_OR_ZERO(update); + if (ret) + break; + + bkey_init(&update->k); + update->k.p = new_pos; + update->k.p.snapshot = k.k->p.snapshot; + + bch2_trans_iter_init(trans, &update_iter, id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); + bch2_trans_iter_exit(trans, &update_iter); + if (ret) + break; + + ret = snapshots_seen_add(c, &s, k.k->p.snapshot); + if (ret) + break; + } + } + bch2_trans_iter_exit(trans, &iter); + darray_exit(&s.ids); + + return ret; +} + +int bch2_data_update_index_update(struct bch_write_op *op) +{ + struct bch_fs *c = op->c; + struct btree_trans trans; + struct btree_iter iter; + struct data_update *m = + container_of(op, struct data_update, op); + struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets); + struct keylist *keys = &op->insert_keys; + struct bkey_buf _new, _insert; + int ret = 0; + + bch2_bkey_buf_init(&_new); + bch2_bkey_buf_init(&_insert); + bch2_bkey_buf_realloc(&_insert, c, U8_MAX); + + bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); + + bch2_trans_iter_init(&trans, &iter, m->btree_id, + bkey_start_pos(&bch2_keylist_front(keys)->k), + BTREE_ITER_SLOTS|BTREE_ITER_INTENT); + + while (1) { + struct bkey_s_c k; + struct bkey_i *insert; + struct bkey_i_extent *new; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bpos next_pos; + bool did_work = false; + bool extending = false, should_check_enospc; + s64 i_sectors_delta = 0, disk_sectors_delta = 0; + + bch2_trans_begin(&trans); + + k = bch2_btree_iter_peek_slot(&iter); + ret = bkey_err(k); + if (ret) + goto err; + + new = bkey_i_to_extent(bch2_keylist_front(keys)); + + if (bversion_cmp(k.k->version, new->k.version) || + !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) + goto nomatch; + + bkey_reassemble(_insert.k, k); + insert = _insert.k; + + bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); + new = bkey_i_to_extent(_new.k); + bch2_cut_front(iter.pos, &new->k_i); + + bch2_cut_front(iter.pos, insert); + bch2_cut_back(new->k.p, insert); + bch2_cut_back(insert->k.p, &new->k_i); + + if (m->data_cmd == DATA_REWRITE) { + struct bch_extent_ptr *new_ptr, *old_ptr = (void *) + bch2_bkey_has_device(bkey_i_to_s_c(insert), + m->data_opts.rewrite_dev); + if (!old_ptr) + goto nomatch; + + if (old_ptr->cached) + extent_for_each_ptr(extent_i_to_s(new), new_ptr) + new_ptr->cached = true; + + __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); + } + + extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { + if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { + /* + * raced with another move op? extent already + * has a pointer to the device we just wrote + * data to + */ + continue; + } + + bch2_extent_ptr_decoded_append(insert, &p); + did_work = true; + } + + if (!did_work) + goto nomatch; + + bch2_bkey_narrow_crcs(insert, + (struct bch_extent_crc_unpacked) { 0 }); + bch2_extent_normalize(c, bkey_i_to_s(insert)); + bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), + op->opts.background_target, + op->opts.data_replicas); + + ret = bch2_sum_sector_overwrites(&trans, &iter, insert, + &extending, + &should_check_enospc, + &i_sectors_delta, + &disk_sectors_delta); + if (ret) + goto err; + + if (disk_sectors_delta > (s64) op->res.sectors) { + ret = bch2_disk_reservation_add(c, &op->res, + disk_sectors_delta - op->res.sectors, + !should_check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); + if (ret) + goto out; + } + + next_pos = insert->k.p; + + ret = insert_snapshot_whiteouts(&trans, m->btree_id, + k.k->p, insert->k.p) ?: + bch2_trans_update(&trans, &iter, insert, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(&trans, &op->res, + op_journal_seq(op), + BTREE_INSERT_NOFAIL| + m->data_opts.btree_insert_flags); + if (!ret) { + bch2_btree_iter_set_pos(&iter, next_pos); + atomic_long_inc(&c->extent_migrate_done); + if (ec_ob) + bch2_ob_add_backpointer(c, ec_ob, &insert->k); + } +err: + if (ret == -EINTR) + ret = 0; + if (ret) + break; +next: + while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) { + bch2_keylist_pop_front(keys); + if (bch2_keylist_empty(keys)) + goto out; + } + continue; +nomatch: + if (m->ctxt) { + BUG_ON(k.k->p.offset <= iter.pos.offset); + atomic64_inc(&m->ctxt->stats->keys_raced); + atomic64_add(k.k->p.offset - iter.pos.offset, + &m->ctxt->stats->sectors_raced); + } + atomic_long_inc(&c->extent_migrate_raced); + trace_move_race(&new->k); + bch2_btree_iter_advance(&iter); + goto next; + } +out: + bch2_trans_iter_exit(&trans, &iter); + bch2_trans_exit(&trans); + bch2_bkey_buf_exit(&_insert, c); + bch2_bkey_buf_exit(&_new, c); + BUG_ON(ret == -EINTR); + return ret; +} + +void bch2_data_update_read_done(struct data_update *m, struct bch_read_bio *rbio) +{ + /* write bio must own pages: */ + BUG_ON(!m->op.wbio.bio.bi_vcnt); + + m->ptr = rbio->pick.ptr; + m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; + m->op.devs_have = rbio->devs_have; + m->op.pos = rbio->data_pos; + m->op.version = rbio->version; + m->op.crc = rbio->pick.crc; + m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; + + if (m->data_cmd == DATA_REWRITE) + bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); +} + +int bch2_data_update_init(struct bch_fs *c, struct data_update *m, + struct write_point_specifier wp, + struct bch_io_opts io_opts, + enum data_cmd data_cmd, + struct data_opts data_opts, + enum btree_id btree_id, + struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; + struct extent_ptr_decoded p; + int ret; + + m->btree_id = btree_id; + m->data_cmd = data_cmd; + m->data_opts = data_opts; + m->nr_ptrs_reserved = 0; + + bch2_write_op_init(&m->op, c, io_opts); + + if (!bch2_bkey_is_incompressible(k)) + m->op.compression_type = + bch2_compression_opt_to_type[io_opts.background_compression ?: + io_opts.compression]; + else + m->op.incompressible = true; + + m->op.target = data_opts.target, + m->op.write_point = wp; + + /* + * op->csum_type is normally initialized from the fs/file's current + * options - but if an extent is encrypted, we require that it stays + * encrypted: + */ + bkey_for_each_crc(k.k, ptrs, crc, entry) + if (bch2_csum_type_is_encryption(crc.csum_type)) { + m->op.nonce = crc.nonce + crc.offset; + m->op.csum_type = crc.csum_type; + break; + } + + if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { + m->op.alloc_reserve = RESERVE_movinggc; + } else { + /* XXX: this should probably be passed in */ + m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; + } + + m->op.flags |= BCH_WRITE_PAGES_STABLE| + BCH_WRITE_PAGES_OWNED| + BCH_WRITE_DATA_ENCODED| + BCH_WRITE_FROM_INTERNAL| + BCH_WRITE_MOVE; + + m->op.nr_replicas = data_opts.nr_replicas; + m->op.nr_replicas_required = data_opts.nr_replicas; + + switch (data_cmd) { + case DATA_ADD_REPLICAS: { + /* + * DATA_ADD_REPLICAS is used for moving data to a different + * device in the background, and due to compression the new copy + * might take up more space than the old copy: + */ +#if 0 + int nr = (int) io_opts.data_replicas - + bch2_bkey_nr_ptrs_allocated(k); +#endif + int nr = (int) io_opts.data_replicas; + + if (nr > 0) { + m->op.nr_replicas = m->nr_ptrs_reserved = nr; + + ret = bch2_disk_reservation_get(c, &m->op.res, + k.k->size, m->op.nr_replicas, 0); + if (ret) + return ret; + } + break; + } + case DATA_REWRITE: { + unsigned compressed_sectors = 0; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (p.ptr.dev == data_opts.rewrite_dev) { + if (p.ptr.cached) + m->op.flags |= BCH_WRITE_CACHED; + + if (!p.ptr.cached && + crc_is_compressed(p.crc)) + compressed_sectors += p.crc.compressed_size; + } + + if (compressed_sectors) { + ret = bch2_disk_reservation_add(c, &m->op.res, + k.k->size * m->op.nr_replicas, + BCH_DISK_RESERVATION_NOFAIL); + if (ret) + return ret; + } + break; + } + case DATA_PROMOTE: + m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; + m->op.flags |= BCH_WRITE_CACHED; + break; + default: + BUG(); + } + + return 0; +} diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h new file mode 100644 index 000000000000..03b4ca5a4ee8 --- /dev/null +++ b/fs/bcachefs/data_update.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BCACHEFS_DATA_UPDATE_H +#define _BCACHEFS_DATA_UPDATE_H + +#include "io_types.h" + +enum data_cmd { + DATA_SKIP, + DATA_SCRUB, + DATA_ADD_REPLICAS, + DATA_REWRITE, + DATA_PROMOTE, +}; + +struct data_opts { + u16 target; + u8 rewrite_dev; + u8 nr_replicas; + int btree_insert_flags; +}; + +struct data_update { + enum btree_id btree_id; + enum data_cmd data_cmd; + struct data_opts data_opts; + + unsigned nr_ptrs_reserved; + + struct moving_context *ctxt; + + /* what we read: */ + struct bch_extent_ptr ptr; + u64 offset; + + struct bch_write_op op; +}; + +int bch2_data_update_index_update(struct bch_write_op *); + +void bch2_data_update_read_done(struct data_update *, struct bch_read_bio *); +int bch2_data_update_init(struct bch_fs *, struct data_update *, + struct write_point_specifier, + struct bch_io_opts, + enum data_cmd, struct data_opts, + enum btree_id, struct bkey_s_c); + +#endif /* _BCACHEFS_DATA_UPDATE_H */ diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index a4c13d61af10..c53187df4651 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -4,6 +4,7 @@ #include "ec_types.h" #include "buckets_types.h" +#include "extents_types.h" #include "keylist_types.h" int bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c, diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index ca72a31da502..743449ed7fae 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -16,6 +16,7 @@ #include "checksum.h" #include "compress.h" #include "clock.h" +#include "data_update.h" #include "debug.h" #include "disk_groups.h" #include "ec.h" @@ -660,7 +661,7 @@ static void __bch2_write_index(struct bch_write_op *op) ret = !(op->flags & BCH_WRITE_MOVE) ? bch2_write_index_default(op) - : bch2_migrate_index_update(op); + : bch2_data_update_index_update(op); BUG_ON(ret == -EINTR); BUG_ON(keylist_sectors(keys) && !ret); @@ -1433,7 +1434,7 @@ struct promote_op { struct rhash_head hash; struct bpos pos; - struct migrate_write write; + struct data_update write; struct bio_vec bi_inline_vecs[0]; /* must be last */ }; @@ -1508,7 +1509,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) sizeof(struct bio_vec) * rbio->bio.bi_vcnt); swap(bio->bi_vcnt, rbio->bio.bi_vcnt); - bch2_migrate_read_done(&op->write, rbio); + bch2_data_update_read_done(&op->write, rbio); closure_call(&op->write.op.cl, bch2_write, c->btree_update_wq, NULL); } @@ -1565,7 +1566,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, bio = &op->write.op.wbio.bio; bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - ret = bch2_migrate_write_init(c, &op->write, + ret = bch2_data_update_init(c, &op->write, writepoint_hashed((unsigned long) current), opts, DATA_PROMOTE, diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 08fb8c71893f..a3a486cff28e 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -6,7 +6,6 @@ #include "btree_gc.h" #include "btree_update.h" #include "btree_update_interior.h" -#include "buckets.h" #include "disk_groups.h" #include "ec.h" #include "inode.h" @@ -15,7 +14,6 @@ #include "keylist.h" #include "move.h" #include "replicas.h" -#include "subvolume.h" #include "super-io.h" #include "trace.h" @@ -32,394 +30,11 @@ struct moving_io { struct bch_read_bio rbio; - struct migrate_write write; + struct data_update write; /* Must be last since it is variable size */ struct bio_vec bi_inline_vecs[0]; }; -struct moving_context { - /* Closure for waiting on all reads and writes to complete */ - struct closure cl; - - struct bch_move_stats *stats; - - struct list_head reads; - - /* in flight sectors: */ - atomic_t read_sectors; - atomic_t write_sectors; - - wait_queue_head_t wait; -}; - -static int insert_snapshot_whiteouts(struct btree_trans *trans, - enum btree_id id, - struct bpos old_pos, - struct bpos new_pos) -{ - struct bch_fs *c = trans->c; - struct btree_iter iter, update_iter; - struct bkey_s_c k; - struct snapshots_seen s; - int ret; - - if (!btree_type_has_snapshots(id)) - return 0; - - snapshots_seen_init(&s); - - if (!bkey_cmp(old_pos, new_pos)) - return 0; - - if (!snapshot_t(c, old_pos.snapshot)->children[0]) - return 0; - - bch2_trans_iter_init(trans, &iter, id, old_pos, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS); - while (1) { -next: - k = bch2_btree_iter_prev(&iter); - ret = bkey_err(k); - if (ret) - break; - - if (bkey_cmp(old_pos, k.k->p)) - break; - - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot)) { - struct bkey_i *update; - u32 *i; - - darray_for_each(s.ids, i) - if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, *i)) - goto next; - - update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); - - ret = PTR_ERR_OR_ZERO(update); - if (ret) - break; - - bkey_init(&update->k); - update->k.p = new_pos; - update->k.p.snapshot = k.k->p.snapshot; - - bch2_trans_iter_init(trans, &update_iter, id, update->k.p, - BTREE_ITER_NOT_EXTENTS| - BTREE_ITER_ALL_SNAPSHOTS| - BTREE_ITER_INTENT); - ret = bch2_btree_iter_traverse(&update_iter) ?: - bch2_trans_update(trans, &update_iter, update, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); - bch2_trans_iter_exit(trans, &update_iter); - if (ret) - break; - - ret = snapshots_seen_add(c, &s, k.k->p.snapshot); - if (ret) - break; - } - } - bch2_trans_iter_exit(trans, &iter); - darray_exit(&s.ids); - - return ret; -} - -int bch2_migrate_index_update(struct bch_write_op *op) -{ - struct bch_fs *c = op->c; - struct btree_trans trans; - struct btree_iter iter; - struct migrate_write *m = - container_of(op, struct migrate_write, op); - struct open_bucket *ec_ob = ec_open_bucket(c, &op->open_buckets); - struct keylist *keys = &op->insert_keys; - struct bkey_buf _new, _insert; - int ret = 0; - - bch2_bkey_buf_init(&_new); - bch2_bkey_buf_init(&_insert); - bch2_bkey_buf_realloc(&_insert, c, U8_MAX); - - bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024); - - bch2_trans_iter_init(&trans, &iter, m->btree_id, - bkey_start_pos(&bch2_keylist_front(keys)->k), - BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - - while (1) { - struct bkey_s_c k; - struct bkey_i *insert; - struct bkey_i_extent *new; - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bpos next_pos; - bool did_work = false; - bool extending = false, should_check_enospc; - s64 i_sectors_delta = 0, disk_sectors_delta = 0; - - bch2_trans_begin(&trans); - - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; - - new = bkey_i_to_extent(bch2_keylist_front(keys)); - - if (bversion_cmp(k.k->version, new->k.version) || - !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) - goto nomatch; - - bkey_reassemble(_insert.k, k); - insert = _insert.k; - - bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys)); - new = bkey_i_to_extent(_new.k); - bch2_cut_front(iter.pos, &new->k_i); - - bch2_cut_front(iter.pos, insert); - bch2_cut_back(new->k.p, insert); - bch2_cut_back(insert->k.p, &new->k_i); - - if (m->data_cmd == DATA_REWRITE) { - struct bch_extent_ptr *new_ptr, *old_ptr = (void *) - bch2_bkey_has_device(bkey_i_to_s_c(insert), - m->data_opts.rewrite_dev); - if (!old_ptr) - goto nomatch; - - if (old_ptr->cached) - extent_for_each_ptr(extent_i_to_s(new), new_ptr) - new_ptr->cached = true; - - __bch2_bkey_drop_ptr(bkey_i_to_s(insert), old_ptr); - } - - extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { - if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) { - /* - * raced with another move op? extent already - * has a pointer to the device we just wrote - * data to - */ - continue; - } - - bch2_extent_ptr_decoded_append(insert, &p); - did_work = true; - } - - if (!did_work) - goto nomatch; - - bch2_bkey_narrow_crcs(insert, - (struct bch_extent_crc_unpacked) { 0 }); - bch2_extent_normalize(c, bkey_i_to_s(insert)); - bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert), - op->opts.background_target, - op->opts.data_replicas); - - ret = bch2_sum_sector_overwrites(&trans, &iter, insert, - &extending, - &should_check_enospc, - &i_sectors_delta, - &disk_sectors_delta); - if (ret) - goto err; - - if (disk_sectors_delta > (s64) op->res.sectors) { - ret = bch2_disk_reservation_add(c, &op->res, - disk_sectors_delta - op->res.sectors, - !should_check_enospc - ? BCH_DISK_RESERVATION_NOFAIL : 0); - if (ret) - goto out; - } - - next_pos = insert->k.p; - - ret = insert_snapshot_whiteouts(&trans, m->btree_id, - k.k->p, insert->k.p) ?: - bch2_trans_update(&trans, &iter, insert, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(&trans, &op->res, - op_journal_seq(op), - BTREE_INSERT_NOFAIL| - m->data_opts.btree_insert_flags); - if (!ret) { - bch2_btree_iter_set_pos(&iter, next_pos); - atomic_long_inc(&c->extent_migrate_done); - if (ec_ob) - bch2_ob_add_backpointer(c, ec_ob, &insert->k); - } -err: - if (ret == -EINTR) - ret = 0; - if (ret) - break; -next: - while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) { - bch2_keylist_pop_front(keys); - if (bch2_keylist_empty(keys)) - goto out; - } - continue; -nomatch: - if (m->ctxt) { - BUG_ON(k.k->p.offset <= iter.pos.offset); - atomic64_inc(&m->ctxt->stats->keys_raced); - atomic64_add(k.k->p.offset - iter.pos.offset, - &m->ctxt->stats->sectors_raced); - } - atomic_long_inc(&c->extent_migrate_raced); - trace_move_race(&new->k); - bch2_btree_iter_advance(&iter); - goto next; - } -out: - bch2_trans_iter_exit(&trans, &iter); - bch2_trans_exit(&trans); - bch2_bkey_buf_exit(&_insert, c); - bch2_bkey_buf_exit(&_new, c); - BUG_ON(ret == -EINTR); - return ret; -} - -void bch2_migrate_read_done(struct migrate_write *m, struct bch_read_bio *rbio) -{ - /* write bio must own pages: */ - BUG_ON(!m->op.wbio.bio.bi_vcnt); - - m->ptr = rbio->pick.ptr; - m->offset = rbio->data_pos.offset - rbio->pick.crc.offset; - m->op.devs_have = rbio->devs_have; - m->op.pos = rbio->data_pos; - m->op.version = rbio->version; - m->op.crc = rbio->pick.crc; - m->op.wbio.bio.bi_iter.bi_size = m->op.crc.compressed_size << 9; - - if (m->data_cmd == DATA_REWRITE) - bch2_dev_list_drop_dev(&m->op.devs_have, m->data_opts.rewrite_dev); -} - -int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, - struct write_point_specifier wp, - struct bch_io_opts io_opts, - enum data_cmd data_cmd, - struct data_opts data_opts, - enum btree_id btree_id, - struct bkey_s_c k) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - struct extent_ptr_decoded p; - int ret; - - m->btree_id = btree_id; - m->data_cmd = data_cmd; - m->data_opts = data_opts; - m->nr_ptrs_reserved = 0; - - bch2_write_op_init(&m->op, c, io_opts); - - if (!bch2_bkey_is_incompressible(k)) - m->op.compression_type = - bch2_compression_opt_to_type[io_opts.background_compression ?: - io_opts.compression]; - else - m->op.incompressible = true; - - m->op.target = data_opts.target, - m->op.write_point = wp; - - /* - * op->csum_type is normally initialized from the fs/file's current - * options - but if an extent is encrypted, we require that it stays - * encrypted: - */ - bkey_for_each_crc(k.k, ptrs, crc, entry) - if (bch2_csum_type_is_encryption(crc.csum_type)) { - m->op.nonce = crc.nonce + crc.offset; - m->op.csum_type = crc.csum_type; - break; - } - - if (m->data_opts.btree_insert_flags & BTREE_INSERT_USE_RESERVE) { - m->op.alloc_reserve = RESERVE_movinggc; - } else { - /* XXX: this should probably be passed in */ - m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS; - } - - m->op.flags |= BCH_WRITE_PAGES_STABLE| - BCH_WRITE_PAGES_OWNED| - BCH_WRITE_DATA_ENCODED| - BCH_WRITE_FROM_INTERNAL| - BCH_WRITE_MOVE; - - m->op.nr_replicas = data_opts.nr_replicas; - m->op.nr_replicas_required = data_opts.nr_replicas; - - switch (data_cmd) { - case DATA_ADD_REPLICAS: { - /* - * DATA_ADD_REPLICAS is used for moving data to a different - * device in the background, and due to compression the new copy - * might take up more space than the old copy: - */ -#if 0 - int nr = (int) io_opts.data_replicas - - bch2_bkey_nr_ptrs_allocated(k); -#endif - int nr = (int) io_opts.data_replicas; - - if (nr > 0) { - m->op.nr_replicas = m->nr_ptrs_reserved = nr; - - ret = bch2_disk_reservation_get(c, &m->op.res, - k.k->size, m->op.nr_replicas, 0); - if (ret) - return ret; - } - break; - } - case DATA_REWRITE: { - unsigned compressed_sectors = 0; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) - if (p.ptr.dev == data_opts.rewrite_dev) { - if (p.ptr.cached) - m->op.flags |= BCH_WRITE_CACHED; - - if (!p.ptr.cached && - crc_is_compressed(p.crc)) - compressed_sectors += p.crc.compressed_size; - } - - if (compressed_sectors) { - ret = bch2_disk_reservation_add(c, &m->op.res, - k.k->size * m->op.nr_replicas, - BCH_DISK_RESERVATION_NOFAIL); - if (ret) - return ret; - } - break; - } - case DATA_PROMOTE: - m->op.flags |= BCH_WRITE_ALLOC_NOWAIT; - m->op.flags |= BCH_WRITE_CACHED; - break; - default: - BUG(); - } - - return 0; -} - static void move_free(struct moving_io *io) { struct moving_context *ctxt = io->write.ctxt; @@ -457,7 +72,7 @@ static void move_write(struct moving_io *io) closure_get(&io->write.ctxt->cl); atomic_add(io->write_sectors, &io->write.ctxt->write_sectors); - bch2_migrate_read_done(&io->write, &io->rbio); + bch2_data_update_read_done(&io->write, &io->rbio); closure_call(&io->write.op.cl, bch2_write, NULL, NULL); } @@ -564,8 +179,8 @@ static int bch2_move_extent(struct btree_trans *trans, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; - ret = bch2_migrate_write_init(c, &io->write, wp, io_opts, - data_cmd, data_opts, btree_id, k); + ret = bch2_data_update_init(c, &io->write, wp, io_opts, + data_cmd, data_opts, btree_id, k); if (ret) goto err_free_pages; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 901d8f875946..6d273f67a82c 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -4,51 +4,26 @@ #include "btree_iter.h" #include "buckets.h" -#include "io_types.h" +#include "data_update.h" #include "move_types.h" struct bch_read_bio; -struct moving_context; -enum data_cmd { - DATA_SKIP, - DATA_SCRUB, - DATA_ADD_REPLICAS, - DATA_REWRITE, - DATA_PROMOTE, +struct moving_context { + /* Closure for waiting on all reads and writes to complete */ + struct closure cl; + + struct bch_move_stats *stats; + + struct list_head reads; + + /* in flight sectors: */ + atomic_t read_sectors; + atomic_t write_sectors; + + wait_queue_head_t wait; }; -struct data_opts { - u16 target; - u8 rewrite_dev; - u8 nr_replicas; - int btree_insert_flags; -}; - -struct migrate_write { - enum btree_id btree_id; - enum data_cmd data_cmd; - struct data_opts data_opts; - - unsigned nr_ptrs_reserved; - - struct moving_context *ctxt; - - /* what we read: */ - struct bch_extent_ptr ptr; - u64 offset; - - struct bch_write_op op; -}; - -int bch2_migrate_index_update(struct bch_write_op *); -void bch2_migrate_read_done(struct migrate_write *, struct bch_read_bio *); -int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, - struct write_point_specifier, - struct bch_io_opts, - enum data_cmd, struct data_opts, - enum btree_id, struct bkey_s_c); - typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, struct bch_io_opts *, struct data_opts *);