diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index f515b038c14e..e81c04bc2327 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -914,13 +914,11 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_lru = alloc_lru_idx(*new_a); if (old_lru != new_lru) { - ret = bch2_lru_change(trans, new->k.p.inode, new->k.p.offset, - old_lru, &new_lru, old); + ret = bch2_lru_change(trans, new->k.p.inode, + bucket_to_u64(new->k.p), + old_lru, new_lru); if (ret) return ret; - - if (new_a->data_type == BCH_DATA_cached) - new_a->io_time[READ] = new_lru; } if (old_a->gen != new_a->gen) { @@ -1510,7 +1508,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct bkey_s_c alloc_k, k; struct printbuf buf = PRINTBUF; - struct printbuf buf2 = PRINTBUF; int ret; alloc_k = bch2_btree_iter_peek(alloc_iter); @@ -1527,8 +1524,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, return 0; bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, - POS(alloc_k.k->p.inode, a->io_time[READ]), 0); - + lru_pos(alloc_k.k->p.inode, + bucket_to_u64(alloc_k.k->p), + a->io_time[READ]), 0); k = bch2_btree_iter_peek_slot(&lru_iter); ret = bkey_err(k); if (ret) @@ -1539,21 +1537,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || - fsck_err_on(k.k->type != KEY_TYPE_lru || - le64_to_cpu(bkey_s_c_to_lru(k).v->idx) != alloc_k.k->p.offset, c, - "incorrect/missing lru entry\n" - " %s\n" + fsck_err_on(k.k->type != KEY_TYPE_set, c, + "missing lru entry\n" " %s", (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), - (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { u64 read_time = a->io_time[READ] ?: atomic64_read(&c->io_clock[READ].now); ret = bch2_lru_set(trans, alloc_k.k->p.inode, - alloc_k.k->p.offset, - &read_time); + bucket_to_u64(alloc_k.k->p), + read_time); if (ret) goto err; @@ -1574,7 +1569,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &lru_iter); - printbuf_exit(&buf2); printbuf_exit(&buf); return ret; } @@ -1757,51 +1751,34 @@ void bch2_do_discards(struct bch_fs *c) } static int invalidate_one_bucket(struct btree_trans *trans, - struct btree_iter *lru_iter, struct bkey_s_c k, - unsigned dev_idx, s64 *nr_to_invalidate) + struct btree_iter *lru_iter, + struct bpos bucket, + s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; struct btree_iter alloc_iter = { NULL }; struct bkey_i_alloc_v4 *a; - struct bpos bucket; struct printbuf buf = PRINTBUF; unsigned cached_sectors; int ret = 0; - if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx) + if (*nr_to_invalidate <= 0) return 1; - if (k.k->type != KEY_TYPE_lru) { - prt_printf(&buf, "non lru key in lru btree:\n "); - bch2_bkey_val_to_text(&buf, c, k); - - if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { - bch_err(c, "%s", buf.buf); - } else { - bch2_trans_inconsistent(trans, "%s", buf.buf); - ret = -EINVAL; - } - - goto out; - } - - bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx)); - a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; - if (k.k->p.offset != alloc_lru_idx(a->v)) { + if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) { prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); + bch2_bpos_to_text(&buf, lru_iter->pos); prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); - if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { - bch_err(c, "%s", buf.buf); - } else { - bch2_trans_inconsistent(trans, "%s", buf.buf); + bch_err(c, "%s", buf.buf); + if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { + bch2_inconsistent_error(c); ret = -EINVAL; } @@ -1852,9 +1829,13 @@ static void bch2_do_invalidates_work(struct work_struct *work) s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru, - POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k, - invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate)); + ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, 0), + lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), + BTREE_ITER_INTENT, k, + invalidate_one_bucket(&trans, &iter, + u64_to_bucket(k.k->p.offset), + &nr_to_invalidate)); if (ret < 0) { percpu_ref_put(&ca->ref); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 7e67d2e94a29..99f9fbd1401f 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1562,7 +1562,8 @@ struct bch_sb_field_journal_seq_blacklist { x(backpointers, 22) \ x(inode_v3, 23) \ x(unwritten_extents, 24) \ - x(bucket_gens, 25) + x(bucket_gens, 25) \ + x(lru_v2, 26) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 293188f47e8a..f40a3ea3f79b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -186,7 +186,7 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_snapshot), [BKEY_TYPE_lru] = (1U << KEY_TYPE_deleted)| - (1U << KEY_TYPE_lru), + (1U << KEY_TYPE_set), [BKEY_TYPE_freespace] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_set), diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 12821868df71..6f7becb051bc 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -8,6 +8,7 @@ #include "lru.h" #include "recovery.h" +/* KEY_TYPE_lru is obsolete: */ int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, int rw, struct printbuf *err) { @@ -19,7 +20,7 @@ int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, return -BCH_ERR_invalid_bkey; } - if (!k.k->p.offset) { + if (!lru_pos_time(k.k->p)) { prt_printf(err, "lru entry at time=0"); return -BCH_ERR_invalid_bkey; @@ -36,101 +37,57 @@ void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "idx %llu", le64_to_cpu(lru->idx)); } -int bch2_lru_delete(struct btree_trans *trans, u64 id, u64 idx, u64 time, - struct bkey_s_c orig_k) +static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, + u64 dev_bucket, u64 time, unsigned key_type) { struct btree_iter iter; - struct bkey_s_c k; - u64 existing_idx; - struct printbuf buf = PRINTBUF; + struct bkey_i *k; int ret = 0; if (!time) return 0; + k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); + ret = PTR_ERR_OR_ZERO(k); + if (unlikely(ret)) + return ret; + + bkey_init(&k->k); + k->k.type = key_type; + k->k.p = lru_pos(lru_id, dev_bucket, time); + + EBUG_ON(lru_pos_id(k->k.p) != lru_id); + EBUG_ON(lru_pos_time(k->k.p) != time); + EBUG_ON(k->k.p.offset != dev_bucket); + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, - POS(id, time), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; + k->k.p, BTREE_ITER_INTENT); - if (k.k->type != KEY_TYPE_lru) { - bch2_bkey_val_to_text(&buf, trans->c, orig_k); - bch2_trans_inconsistent(trans, - "pointer to nonexistent lru %llu:%llu\n%s", - id, time, buf.buf); - ret = -EIO; - goto err; - } - - existing_idx = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); - if (existing_idx != idx) { - bch2_bkey_val_to_text(&buf, trans->c, orig_k); - bch2_trans_inconsistent(trans, - "lru %llu:%llu with wrong backpointer: got %llu, should be %llu\n%s", - id, time, existing_idx, idx, buf.buf); - ret = -EIO; - goto err; - } - - ret = bch2_btree_delete_at(trans, &iter, 0); -err: - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); - return ret; -} - -int bch2_lru_set(struct btree_trans *trans, u64 lru_id, u64 idx, u64 *time) -{ - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_lru *lru; - int ret = 0; - - if (!*time) - return 0; - - for_each_btree_key_norestart(trans, iter, BTREE_ID_lru, - POS(lru_id, *time), - BTREE_ITER_SLOTS| - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES, k, ret) - if (bkey_deleted(k.k)) - break; - - if (ret) - goto err; - - BUG_ON(iter.pos.inode != lru_id); - *time = iter.pos.offset; - - lru = bch2_bkey_alloc(trans, &iter, lru); - ret = PTR_ERR_OR_ZERO(lru); - if (ret) - goto err; - - lru->v.idx = cpu_to_le64(idx); - - ret = bch2_trans_update(trans, &iter, &lru->k_i, 0); - if (ret) - goto err; -err: + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, 0); bch2_trans_iter_exit(trans, &iter); return ret; } -int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx, - u64 old_time, u64 *new_time, - struct bkey_s_c k) +int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - if (old_time == *new_time) + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); +} + +int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) +{ + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); +} + +int bch2_lru_change(struct btree_trans *trans, + u16 lru_id, u64 dev_bucket, + u64 old_time, u64 new_time) +{ + if (old_time == new_time) return 0; - return bch2_lru_delete(trans, id, idx, old_time, k) ?: - bch2_lru_set(trans, id, idx, new_time); + return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: + bch2_lru_set(trans, lru_id, dev_bucket, new_time); } static int bch2_check_lru_key(struct btree_trans *trans, @@ -144,12 +101,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; - struct bpos alloc_pos; + struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); int ret; - alloc_pos = POS(lru_k.k->p.inode, - le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx)); - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) @@ -163,10 +117,12 @@ static int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); - if (fsck_err_on(a->data_type != BCH_DATA_cached || - a->io_time[READ] != lru_k.k->p.offset, c, - "incorrect lru entry %s\n" + if (fsck_err_on(lru_k.k->type != KEY_TYPE_set || + a->data_type != BCH_DATA_cached || + a->io_time[READ] != lru_pos_time(lru_k.k->p), c, + "incorrect lru entry (time %llu) %s\n" " for %s", + lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { ret = bch2_btree_delete_at(trans, lru_iter, 0); diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 925c29b49b86..2e22f139848a 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -2,6 +2,26 @@ #ifndef _BCACHEFS_LRU_H #define _BCACHEFS_LRU_H +#define LRU_TIME_BITS 48 +#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) + +static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) +{ + EBUG_ON(time > LRU_TIME_MAX); + + return POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket); +} + +static inline u64 lru_pos_id(struct bpos pos) +{ + return pos.inode >> LRU_TIME_BITS; +} + +static inline u64 lru_pos_time(struct bpos pos) +{ + return pos.inode & ~(~0ULL << LRU_TIME_BITS); +} + int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -10,9 +30,9 @@ void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .val_to_text = bch2_lru_to_text, \ }) -int bch2_lru_delete(struct btree_trans *, u64, u64, u64, struct bkey_s_c); -int bch2_lru_set(struct btree_trans *, u64, u64, u64 *); -int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *, struct bkey_s_c); +int bch2_lru_del(struct btree_trans *, u16, u64, u64); +int bch2_lru_set(struct btree_trans *, u16, u64, u64); +int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); int bch2_check_lrus(struct bch_fs *); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index b10ba8963350..8a78377bf9c5 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1094,14 +1094,11 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_backpointers) { + if (c->sb.version < bcachefs_metadata_version_lru_v2) { bch_info(c, "version prior to backpointers, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_inode_v3) { - bch_info(c, "version prior to inode_v3, upgrade required"); - c->opts.version_upgrade = true; } }