diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile index 45b64f89258c..b81268418174 100644 --- a/fs/bcachefs/Makefile +++ b/fs/bcachefs/Makefile @@ -28,6 +28,7 @@ bcachefs-y := \ clock.o \ compress.o \ counters.o \ + darray.o \ debug.o \ dirent.o \ disk_groups.o \ @@ -70,6 +71,7 @@ bcachefs-y := \ reflink.o \ replicas.o \ sb-clean.o \ + sb-downgrade.o \ sb-errors.o \ sb-members.o \ siphash.o \ diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index f3809897f00a..3640f417cce1 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -366,7 +366,8 @@ int bch2_set_acl(struct mnt_idmap *idmap, bch2_trans_begin(trans); acl = _acl; - ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), + ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?: + bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode), BTREE_ITER_INTENT); if (ret) goto btree_err; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index dfa22f9d9a1d..b62737fdf5ab 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -737,6 +737,7 @@ struct bch_fs { unsigned nsec_per_time_unit; u64 features; u64 compat; + unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)]; } sb; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1ab1f08d763b..fe78e87603fc 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1207,19 +1207,21 @@ struct bch_sb_field { }; #define BCH_SB_FIELDS() \ - x(journal, 0) \ - x(members_v1, 1) \ - x(crypt, 2) \ - x(replicas_v0, 3) \ - x(quota, 4) \ - x(disk_groups, 5) \ - x(clean, 6) \ - x(replicas, 7) \ - x(journal_seq_blacklist, 8) \ - x(journal_v2, 9) \ - x(counters, 10) \ - x(members_v2, 11) \ - x(errors, 12) + x(journal, 0) \ + x(members_v1, 1) \ + x(crypt, 2) \ + x(replicas_v0, 3) \ + x(quota, 4) \ + x(disk_groups, 5) \ + x(clean, 6) \ + x(replicas, 7) \ + x(journal_seq_blacklist, 8) \ + x(journal_v2, 9) \ + x(counters, 10) \ + x(members_v2, 11) \ + x(errors, 12) \ + x(ext, 13) \ + x(downgrade, 14) enum bch_sb_field_type { #define x(f, nr) BCH_SB_FIELD_##f = nr, @@ -1631,6 +1633,24 @@ struct bch_sb_field_errors { LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16); LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); +struct bch_sb_field_ext { + struct bch_sb_field field; + __le64 recovery_passes_required[2]; + __le64 errors_silent[8]; +}; + +struct bch_sb_field_downgrade_entry { + __le16 version; + __le64 recovery_passes[2]; + __le16 nr_errors; + __le16 errors[] __counted_by(nr_errors); +} __packed __aligned(2); + +struct bch_sb_field_downgrade { + struct bch_sb_field field; + struct bch_sb_field_downgrade_entry entries[]; +}; + /* Superblock: */ /* @@ -1644,6 +1664,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64); #define RECOVERY_PASS_ALL_FSCK (1ULL << 63) +/* + * field 1: version name + * field 2: BCH_VERSION(major, minor) + * field 3: recovery passess required on upgrade + */ #define BCH_METADATA_VERSIONS() \ x(bkey_renumber, BCH_VERSION(0, 10), \ RECOVERY_PASS_ALL_FSCK) \ diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 6be79129738d..da594e006769 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e goto out_no_locked; /* - * iter->pos should be mononotically increasing, and always be - * equal to the key we just returned - except extents can - * straddle iter->pos: + * We need to check against @end before FILTER_SNAPSHOTS because + * if we get to a different inode that requested we might be + * seeing keys for a different snapshot tree that will all be + * filtered out. + * + * But we can't do the full check here, because bkey_start_pos() + * isn't monotonically increasing before FILTER_SNAPSHOTS, and + * that's what we check against in extents mode: */ - if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) - iter_pos = k.k->p; - else - iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k)); - - if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) - ? bkey_gt(iter_pos, end) - : bkey_ge(iter_pos, end))) + if (k.k->p.inode > end.inode) goto end; if (iter->update_path && @@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e continue; } + /* + * iter->pos should be mononotically increasing, and always be + * equal to the key we just returned - except extents can + * straddle iter->pos: + */ + if (!(iter->flags & BTREE_ITER_IS_EXTENTS)) + iter_pos = k.k->p; + else + iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k)); + + if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS) + ? bkey_gt(iter_pos, end) + : bkey_ge(iter_pos, end))) + goto end; + break; } diff --git a/fs/bcachefs/darray.c b/fs/bcachefs/darray.c new file mode 100644 index 000000000000..ac35b8b705ae --- /dev/null +++ b/fs/bcachefs/darray.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "darray.h" + +int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp) +{ + if (new_size > d->size) { + new_size = roundup_pow_of_two(new_size); + + void *data = kvmalloc_array(new_size, element_size, gfp); + if (!data) + return -ENOMEM; + + memcpy(data, d->data, d->size * element_size); + if (d->data != d->preallocated) + kvfree(d->data); + d->data = data; + d->size = new_size; + } + + return 0; +} diff --git a/fs/bcachefs/darray.h b/fs/bcachefs/darray.h index 87b4b2d1ec76..e367c625f057 100644 --- a/fs/bcachefs/darray.h +++ b/fs/bcachefs/darray.h @@ -8,39 +8,48 @@ * Inspired by CCAN's darray */ -#include "util.h" #include -#define DARRAY(type) \ +#define DARRAY_PREALLOCATED(_type, _nr) \ struct { \ size_t nr, size; \ - type *data; \ + _type *data; \ + _type preallocated[_nr]; \ } -typedef DARRAY(void) darray_void; +#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0) -static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp) +typedef DARRAY(char) darray_char; + +int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t); + +static inline int __darray_resize(darray_char *d, size_t element_size, + size_t new_size, gfp_t gfp) { - if (d->nr + more > d->size) { - size_t new_size = roundup_pow_of_two(d->nr + more); - void *data = krealloc_array(d->data, new_size, t_size, gfp); + return unlikely(new_size > d->size) + ? __bch2_darray_resize(d, element_size, new_size, gfp) + : 0; +} - if (!data) - return -ENOMEM; +#define darray_resize_gfp(_d, _new_size, _gfp) \ + unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp)) - d->data = data; - d->size = new_size; - } +#define darray_resize(_d, _new_size) \ + darray_resize_gfp(_d, _new_size, GFP_KERNEL) - return 0; +static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp) +{ + return __darray_resize(d, t_size, d->nr + more, gfp); } #define darray_make_room_gfp(_d, _more, _gfp) \ - __darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp) + __darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp) #define darray_make_room(_d, _more) \ darray_make_room_gfp(_d, _more, GFP_KERNEL) +#define darray_room(_d) ((_d).size - (_d).nr) + #define darray_top(_d) ((_d).data[(_d).nr]) #define darray_push_gfp(_d, _item, _gfp) \ @@ -80,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, #define darray_init(_d) \ do { \ - (_d)->data = NULL; \ - (_d)->nr = (_d)->size = 0; \ + (_d)->nr = 0; \ + (_d)->size = ARRAY_SIZE((_d)->preallocated); \ + (_d)->data = (_d)->size ? (_d)->preallocated : NULL; \ } while (0) #define darray_exit(_d) \ do { \ - kfree((_d)->data); \ + if (!ARRAY_SIZE((_d)->preallocated) || \ + (_d)->data != (_d)->preallocated) \ + kvfree((_d)->data); \ darray_init(_d); \ } while (0) diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index ae7910bf2228..9ce29681eec9 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -95,6 +95,7 @@ x(ENOSPC, ENOSPC_sb_members) \ x(ENOSPC, ENOSPC_sb_members_v2) \ x(ENOSPC, ENOSPC_sb_crypt) \ + x(ENOSPC, ENOSPC_sb_downgrade) \ x(ENOSPC, ENOSPC_btree_slot) \ x(ENOSPC, ENOSPC_snapshot_tree) \ x(ENOENT, ENOENT_bkey_type_mismatch) \ @@ -218,6 +219,8 @@ x(BCH_ERR_invalid_sb, invalid_sb_quota) \ x(BCH_ERR_invalid_sb, invalid_sb_errors) \ x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \ + x(BCH_ERR_invalid_sb, invalid_sb_ext) \ + x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \ x(BCH_ERR_invalid, invalid_bkey) \ x(BCH_ERR_operation_blocked, nocow_lock_blocked) \ x(EIO, btree_node_read_err) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 7b28d37922fd..25cf78a7b946 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c, struct printbuf buf = PRINTBUF, *out = &buf; int ret = -BCH_ERR_fsck_ignore; + if (test_bit(err, c->sb.errors_silent)) + return -BCH_ERR_fsck_fix; + bch2_sb_error_count(c, err); va_start(args, fmt); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 9a479e4de6b3..84e20c3ada6c 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -216,11 +216,11 @@ struct dio_write { struct address_space *mapping; struct bch_inode_info *inode; struct mm_struct *mm; + const struct iovec *iov; unsigned loop:1, extending:1, sync:1, - flush:1, - free_iov:1; + flush:1; struct quota_res quota_res; u64 written; @@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio) return -1; if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) { - iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), + dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov), GFP_KERNEL); if (unlikely(!iov)) return -ENOMEM; - - dio->free_iov = true; } memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov)); @@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio) bch2_pagecache_block_put(inode); - if (dio->free_iov) - kfree(dio->iter.__iov); + kfree(dio->iov); ret = dio->op.error ?: ((long) dio->written << 9); bio_put(&dio->op.wbio.bio); @@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) dio->mapping = mapping; dio->inode = inode; dio->mm = current->mm; + dio->iov = NULL; dio->loop = false; dio->extending = extending; dio->sync = is_sync_kiocb(req) || extending; dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled; - dio->free_iov = false; dio->quota_res.sectors = 0; dio->written = 0; dio->iter = *iter; diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index a70b7a03057d..14d5cc6f90d7 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c, } mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s, + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + bch2_write_inode(c, inode, bch2_inode_flags_set, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); @@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c, } mutex_lock(&inode->ei_update_lock); - ret = bch2_set_projid(c, inode, fa.fsx_projid); - if (ret) - goto err_unlock; - - ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + bch2_set_projid(c, inode, fa.fsx_projid) ?: + bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); -err_unlock: mutex_unlock(&inode->ei_update_lock); err: inode_unlock(&inode->v); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index ba93e32d7708..49da8db1d9e9 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -258,7 +258,8 @@ __bch2_create(struct mnt_idmap *idmap, retry: bch2_trans_begin(trans); - ret = bch2_create_trans(trans, + ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?: + bch2_create_trans(trans, inode_inum(dir), &dir_u, &inode_u, !(flags & BCH_CREATE_TMPFILE) ? &dentry->d_name : NULL, @@ -430,7 +431,9 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, lockdep_assert_held(&inode->v.i_rwsem); - ret = __bch2_link(c, inode, dir, dentry); + ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: + bch2_subvol_is_ro(c, inode->ei_subvol) ?: + __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) return ret; @@ -481,7 +484,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, static int bch2_unlink(struct inode *vdir, struct dentry *dentry) { - return __bch2_unlink(vdir, dentry, false); + struct bch_inode_info *dir= to_bch_ei(vdir); + struct bch_fs *c = dir->v.i_sb->s_fs_info; + + return bch2_subvol_is_ro(c, dir->ei_subvol) ?: + __bch2_unlink(vdir, dentry, false); } static int bch2_symlink(struct mnt_idmap *idmap, @@ -562,6 +569,11 @@ static int bch2_rename2(struct mnt_idmap *idmap, src_inode, dst_inode); + ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?: + bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol); + if (ret) + goto err; + if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) { ret = bch2_fs_quota_transfer(c, src_inode, dst_dir->ei_qid, @@ -783,11 +795,13 @@ static int bch2_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; int ret; lockdep_assert_held(&inode->v.i_rwsem); - ret = setattr_prepare(idmap, dentry, iattr); + ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + setattr_prepare(idmap, dentry, iattr); if (ret) return ret; @@ -1010,12 +1024,26 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) return bch2_err_class(ret); } +static int bch2_open(struct inode *vinode, struct file *file) +{ + if (file->f_flags & (O_WRONLY|O_RDWR)) { + struct bch_inode_info *inode = to_bch_ei(vinode); + struct bch_fs *c = inode->v.i_sb->s_fs_info; + + int ret = bch2_subvol_is_ro(c, inode->ei_subvol); + if (ret) + return ret; + } + + return generic_file_open(vinode, file); +} + static const struct file_operations bch_file_operations = { + .open = bch2_open, .llseek = bch2_llseek, .read_iter = bch2_read_iter, .write_iter = bch2_write_iter, .mmap = bch2_mmap, - .open = generic_file_open, .fsync = bch2_fsync, .splice_read = filemap_splice_read, .splice_write = iter_file_splice_write, diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index 8ede46b1e354..8c8cb1541ac9 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done) bch2_write_done(cl); } +struct bucket_to_lock { + struct bpos b; + unsigned gen; + struct nocow_lock_bucket *l; +}; + static void bch2_nocow_write(struct bch_write_op *op) { struct bch_fs *c = op->c; @@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op) struct bkey_s_c k; struct bkey_ptrs_c ptrs; const struct bch_extent_ptr *ptr; - struct { - struct bpos b; - unsigned gen; - struct nocow_lock_bucket *l; - } buckets[BCH_REPLICAS_MAX]; - unsigned nr_buckets = 0; + DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets; + struct bucket_to_lock *i; u32 snapshot; - int ret, i; + struct bucket_to_lock *stale_at; + int ret; if (op->flags & BCH_WRITE_MOVE) return; + darray_init(&buckets); trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); @@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op) while (1) { struct bio *bio = &op->wbio.bio; - nr_buckets = 0; + buckets.nr = 0; k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); @@ -1263,26 +1267,26 @@ static void bch2_nocow_write(struct bch_write_op *op) break; if (bch2_keylist_realloc(&op->insert_keys, - op->inline_keys, - ARRAY_SIZE(op->inline_keys), - k.k->u64s)) + op->inline_keys, + ARRAY_SIZE(op->inline_keys), + k.k->u64s)) break; /* Get iorefs before dropping btree locks: */ ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr) { - buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); - buckets[nr_buckets].gen = ptr->gen; - buckets[nr_buckets].l = - bucket_nocow_lock(&c->nocow_locks, - bucket_to_u64(buckets[nr_buckets].b)); - - prefetch(buckets[nr_buckets].l); + struct bpos b = PTR_BUCKET_POS(c, ptr); + struct nocow_lock_bucket *l = + bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b)); + prefetch(l); if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE))) goto err_get_ioref; - nr_buckets++; + /* XXX allocating memory with btree locks held - rare */ + darray_push_gfp(&buckets, ((struct bucket_to_lock) { + .b = b, .gen = ptr->gen, .l = l, + }), GFP_KERNEL|__GFP_NOFAIL); if (ptr->unwritten) op->flags |= BCH_WRITE_CONVERT_UNWRITTEN; @@ -1296,21 +1300,21 @@ static void bch2_nocow_write(struct bch_write_op *op) if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN) bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top); - for (i = 0; i < nr_buckets; i++) { - struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); - struct nocow_lock_bucket *l = buckets[i].l; - bool stale; + darray_for_each(buckets, i) { + struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode); - __bch2_bucket_nocow_lock(&c->nocow_locks, l, - bucket_to_u64(buckets[i].b), + __bch2_bucket_nocow_lock(&c->nocow_locks, i->l, + bucket_to_u64(i->b), BUCKET_NOCOW_LOCK_UPDATE); rcu_read_lock(); - stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); + bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen); rcu_read_unlock(); - if (unlikely(stale)) + if (unlikely(stale)) { + stale_at = i; goto err_bucket_stale; + } } bio = &op->wbio.bio; @@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op) if (ret) { bch_err_inum_offset_ratelimited(c, - op->pos.inode, - op->pos.offset << 9, - "%s: btree lookup error %s", - __func__, bch2_err_str(ret)); + op->pos.inode, op->pos.offset << 9, + "%s: btree lookup error %s", __func__, bch2_err_str(ret)); op->error = ret; op->flags |= BCH_WRITE_DONE; } bch2_trans_put(trans); + darray_exit(&buckets); /* fallback to cow write path? */ if (!(op->flags & BCH_WRITE_DONE)) { @@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op) } return; err_get_ioref: - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); + darray_for_each(buckets, i) + percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref); /* Fall back to COW path: */ goto out; err_bucket_stale: - while (i >= 0) { - bch2_bucket_nocow_unlock(&c->nocow_locks, - buckets[i].b, - BUCKET_NOCOW_LOCK_UPDATE); - --i; + darray_for_each(buckets, i) { + bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE); + if (i == stale_at) + break; } - for (i = 0; i < nr_buckets; i++) - percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref); /* We can retry this: */ ret = -BCH_ERR_transaction_restart; - goto out; + goto err_get_ioref; } static void __bch2_write(struct bch_write_op *op) diff --git a/fs/bcachefs/printbuf.c b/fs/bcachefs/printbuf.c index 5e653eb81d54..accf246c3233 100644 --- a/fs/bcachefs/printbuf.c +++ b/fs/bcachefs/printbuf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-2.1+ /* Copyright (C) 2022 Kent Overstreet */ +#include #include #include #include @@ -423,3 +424,24 @@ void bch2_prt_bitflags(struct printbuf *out, flags ^= BIT_ULL(bit); } } + +void bch2_prt_bitflags_vector(struct printbuf *out, + const char * const list[], + unsigned long *v, unsigned nr) +{ + bool first = true; + unsigned i; + + for (i = 0; i < nr; i++) + if (!list[i]) { + nr = i - 1; + break; + } + + for_each_set_bit(i, v, nr) { + if (!first) + bch2_prt_printf(out, ","); + first = false; + bch2_prt_printf(out, "%s", list[i]); + } +} diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 2191423d9f22..9a4a56c40937 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -124,6 +124,8 @@ void bch2_prt_units_u64(struct printbuf *, u64); void bch2_prt_units_s64(struct printbuf *, s64); void bch2_prt_string_option(struct printbuf *, const char * const[], size_t); void bch2_prt_bitflags(struct printbuf *, const char * const[], u64); +void bch2_prt_bitflags_vector(struct printbuf *, const char * const[], + unsigned long *, unsigned); /* Initializer for a heap allocated printbuf: */ #define PRINTBUF ((struct printbuf) { .heap_allocated = true }) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index c7d9074c82d9..5cf7d0532002 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -27,6 +27,7 @@ #include "recovery.h" #include "replicas.h" #include "sb-clean.h" +#include "sb-downgrade.h" #include "snapshot.h" #include "subvolume.h" #include "super-io.h" @@ -481,7 +482,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) } const char * const bch2_recovery_passes[] = { -#define x(_fn, _when) #_fn, +#define x(_fn, ...) #_fn, BCH_RECOVERY_PASSES() #undef x NULL @@ -504,18 +505,47 @@ struct recovery_pass_fn { }; static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when }, +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, BCH_RECOVERY_PASSES() #undef x }; -static void check_version_upgrade(struct bch_fs *c) +u64 bch2_recovery_passes_to_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +u64 bch2_recovery_passes_from_stable(u64 v) +{ + static const u8 map[] = { +#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x + }; + + u64 ret = 0; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) + if (v & BIT_ULL(i)) + ret |= BIT_ULL(map[i]); + return ret; +} + +static bool check_version_upgrade(struct bch_fs *c) { unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version); unsigned latest_version = bcachefs_metadata_version_current; unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; - u64 recovery_passes; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -559,7 +589,7 @@ static void check_version_upgrade(struct bch_fs *c) bch2_version_to_text(&buf, new_version); prt_newline(&buf); - recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); + u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version); if (recovery_passes) { if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK) prt_str(&buf, "fsck required"); @@ -574,12 +604,13 @@ static void check_version_upgrade(struct bch_fs *c) bch_info(c, "%s", buf.buf); - mutex_lock(&c->sb_lock); bch2_sb_upgrade(c, new_version); - mutex_unlock(&c->sb_lock); printbuf_exit(&buf); + return true; } + + return false; } u64 bch2_fsck_recovery_passes(void) @@ -654,7 +685,6 @@ int bch2_fs_recovery(struct bch_fs *c) struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; u64 last_seq = 0, blacklist_seq, journal_seq; - bool write_sb = false; int ret = 0; if (c->sb.clean) { @@ -682,15 +712,73 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery)) - check_version_upgrade(c); - if (c->opts.fsck && c->opts.norecovery) { bch_err(c, "cannot select both norecovery and fsck"); ret = -EINVAL; goto err; } + if (!(c->opts.nochanges && c->opts.norecovery)) { + mutex_lock(&c->sb_lock); + bool write_sb = false; + + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64)); + if (!ext) { + ret = -BCH_ERR_ENOSPC_sb; + mutex_unlock(&c->sb_lock); + goto err; + } + + if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) { + ext->recovery_passes_required[0] |= + cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology))); + write_sb = true; + } + + u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (sb_passes) { + struct printbuf buf = PRINTBUF; + prt_str(&buf, "superblock requires following recovery passes to be run:\n "); + prt_bitflags(&buf, bch2_recovery_passes, sb_passes); + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + } + + if (bch2_check_version_downgrade(c)) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Version downgrade required:\n"); + + __le64 passes = ext->recovery_passes_required[0]; + bch2_sb_set_downgrade(c, + BCH_VERSION_MINOR(bcachefs_metadata_version_current), + BCH_VERSION_MINOR(c->sb.version)); + passes = ext->recovery_passes_required[0] & ~passes; + if (passes) { + prt_str(&buf, " running recovery passes: "); + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(passes))); + } + + bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); + write_sb = true; + } + + if (check_version_upgrade(c)) + write_sb = true; + + if (write_sb) + bch2_write_super(c); + + c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + mutex_unlock(&c->sb_lock); + } + + if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) + c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -827,11 +915,6 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (c->opts.fsck && - (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) || - BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb))) - c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - ret = bch2_run_recovery_passes(c); if (ret) goto err; @@ -868,14 +951,28 @@ int bch2_fs_recovery(struct bch_fs *c) } mutex_lock(&c->sb_lock); - if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) { - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version); + bool write_sb = false; + + if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) { + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version)); + write_sb = true; + } + + if (!test_bit(BCH_FS_ERROR, &c->flags) && + !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) { + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); write_sb = true; } if (!test_bit(BCH_FS_ERROR, &c->flags)) { - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info); - write_sb = true; + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + if (ext && + (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) || + !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) { + memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required)); + memset(ext->errors_silent, 0, sizeof(ext->errors_silent)); + write_sb = true; + } } if (c->opts.fsck && @@ -947,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c) c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - bch2_sb_maybe_downgrade(c); + bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { bch2_sb_upgrade(c, bcachefs_metadata_version_current); diff --git a/fs/bcachefs/recovery.h b/fs/bcachefs/recovery.h index d266aae90200..3a554b0751d0 100644 --- a/fs/bcachefs/recovery.h +++ b/fs/bcachefs/recovery.h @@ -4,6 +4,9 @@ extern const char * const bch2_recovery_passes[]; +u64 bch2_recovery_passes_to_stable(u64 v); +u64 bch2_recovery_passes_from_stable(u64 v); + /* * For when we need to rewind recovery passes and run a pass we skipped: */ diff --git a/fs/bcachefs/recovery_types.h b/fs/bcachefs/recovery_types.h index 515e3d62c2ac..d37c6fd30e38 100644 --- a/fs/bcachefs/recovery_types.h +++ b/fs/bcachefs/recovery_types.h @@ -7,45 +7,57 @@ #define PASS_UNCLEAN BIT(2) #define PASS_ALWAYS BIT(3) -#define BCH_RECOVERY_PASSES() \ - x(alloc_read, PASS_ALWAYS) \ - x(stripes_read, PASS_ALWAYS) \ - x(initialize_subvolumes, 0) \ - x(snapshots_read, PASS_ALWAYS) \ - x(check_topology, 0) \ - x(check_allocations, PASS_FSCK) \ - x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \ - x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \ - x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \ - x(journal_replay, PASS_ALWAYS) \ - x(check_alloc_info, PASS_FSCK) \ - x(check_lrus, PASS_FSCK) \ - x(check_btree_backpointers, PASS_FSCK) \ - x(check_backpointers_to_extents,PASS_FSCK) \ - x(check_extents_to_backpointers,PASS_FSCK) \ - x(check_alloc_to_lru_refs, PASS_FSCK) \ - x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \ - x(bucket_gens_init, 0) \ - x(check_snapshot_trees, PASS_FSCK) \ - x(check_snapshots, PASS_FSCK) \ - x(check_subvols, PASS_FSCK) \ - x(delete_dead_snapshots, PASS_FSCK) \ - x(fs_upgrade_for_subvolumes, 0) \ - x(resume_logged_ops, PASS_ALWAYS) \ - x(check_inodes, PASS_FSCK) \ - x(check_extents, PASS_FSCK) \ - x(check_indirect_extents, PASS_FSCK) \ - x(check_dirents, PASS_FSCK) \ - x(check_xattrs, PASS_FSCK) \ - x(check_root, PASS_FSCK) \ - x(check_directory_structure, PASS_FSCK) \ - x(check_nlinks, PASS_FSCK) \ - x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \ - x(fix_reflink_p, 0) \ - x(set_fs_needs_rebalance, 0) \ +/* + * Passes may be reordered, but the second field is a persistent identifier and + * must never change: + */ +#define BCH_RECOVERY_PASSES() \ + x(alloc_read, 0, PASS_ALWAYS) \ + x(stripes_read, 1, PASS_ALWAYS) \ + x(initialize_subvolumes, 2, 0) \ + x(snapshots_read, 3, PASS_ALWAYS) \ + x(check_topology, 4, 0) \ + x(check_allocations, 5, PASS_FSCK) \ + x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \ + x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \ + x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \ + x(journal_replay, 9, PASS_ALWAYS) \ + x(check_alloc_info, 10, PASS_FSCK) \ + x(check_lrus, 11, PASS_FSCK) \ + x(check_btree_backpointers, 12, PASS_FSCK) \ + x(check_backpointers_to_extents, 13, PASS_FSCK) \ + x(check_extents_to_backpointers, 14, PASS_FSCK) \ + x(check_alloc_to_lru_refs, 15, PASS_FSCK) \ + x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ + x(bucket_gens_init, 17, 0) \ + x(check_snapshot_trees, 18, PASS_FSCK) \ + x(check_snapshots, 19, PASS_FSCK) \ + x(check_subvols, 20, PASS_FSCK) \ + x(delete_dead_snapshots, 21, PASS_FSCK) \ + x(fs_upgrade_for_subvolumes, 22, 0) \ + x(resume_logged_ops, 23, PASS_ALWAYS) \ + x(check_inodes, 24, PASS_FSCK) \ + x(check_extents, 25, PASS_FSCK) \ + x(check_indirect_extents, 26, PASS_FSCK) \ + x(check_dirents, 27, PASS_FSCK) \ + x(check_xattrs, 28, PASS_FSCK) \ + x(check_root, 29, PASS_FSCK) \ + x(check_directory_structure, 30, PASS_FSCK) \ + x(check_nlinks, 31, PASS_FSCK) \ + x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \ + x(fix_reflink_p, 33, 0) \ + x(set_fs_needs_rebalance, 34, 0) \ +/* We normally enumerate recovery passes in the order we run them: */ enum bch_recovery_pass { -#define x(n, when) BCH_RECOVERY_PASS_##n, +#define x(n, id, when) BCH_RECOVERY_PASS_##n, + BCH_RECOVERY_PASSES() +#undef x +}; + +/* But we also need stable identifiers that can be used in the superblock */ +enum bch_recovery_pass_stable { +#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id, BCH_RECOVERY_PASSES() #undef x }; diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index e151ada1c8bd..c76ad8ea5e4a 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c) mutex_lock(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - - bch2_sb_maybe_downgrade(c); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); ret = bch2_write_super(c); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c new file mode 100644 index 000000000000..4919237bbe73 --- /dev/null +++ b/fs/bcachefs/sb-downgrade.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Superblock section that contains a list of recovery passes to run when + * downgrading past a given version + */ + +#include "bcachefs.h" +#include "darray.h" +#include "recovery.h" +#include "sb-downgrade.h" +#include "sb-errors.h" +#include "super-io.h" + +/* + * Downgrade table: + * When dowgrading past certain versions, we need to run certain recovery passes + * and fix certain errors: + * + * x(version, recovery_passes, errors...) + */ + +#define DOWNGRADE_TABLE() + +struct downgrade_entry { + u64 recovery_passes; + u16 version; + u16 nr_errors; + const u16 *errors; +}; + +#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ }; +DOWNGRADE_TABLE() +#undef x + +static const struct downgrade_entry downgrade_table[] = { +#define x(ver, passes, ...) { \ + .recovery_passes = passes, \ + .version = bcachefs_metadata_version_##ver,\ + .nr_errors = ARRAY_SIZE(ver_##errors), \ + .errors = ver_##errors, \ +}, +DOWNGRADE_TABLE() +#undef x +}; + +static inline const struct bch_sb_field_downgrade_entry * +downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) +{ + return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; +} + +#define for_each_downgrade_entry(_d, _i) \ + for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ + (void *) _i < vstruct_end(&(_d)->field) && \ + (void *) &_i->errors[0] < vstruct_end(&(_d)->field); \ + _i = downgrade_entry_next_c(_i)) + +static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); + + for_each_downgrade_entry(e, i) { + if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { + prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", + BCH_VERSION_MAJOR(le16_to_cpu(i->version)), + BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); + return -BCH_ERR_invalid_sb_downgrade; + } + } + + return 0; +} + +static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); + + if (out->nr_tabstops <= 1) + printbuf_tabstop_push(out, 16); + + for_each_downgrade_entry(e, i) { + prt_str(out, "version:"); + prt_tab(out); + bch2_version_to_text(out, le16_to_cpu(i->version)); + prt_newline(out); + + prt_str(out, "recovery passes:"); + prt_tab(out); + prt_bitflags(out, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); + prt_newline(out); + + prt_str(out, "errors:"); + prt_tab(out); + bool first = true; + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { + if (!first) + prt_char(out, ','); + first = false; + unsigned e = le16_to_cpu(i->errors[j]); + prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)"); + } + prt_newline(out); + } +} + +const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { + .validate = bch2_sb_downgrade_validate, + .to_text = bch2_sb_downgrade_to_text, +}; + +int bch2_sb_downgrade_update(struct bch_fs *c) +{ + darray_char table = {}; + int ret = 0; + + for (const struct downgrade_entry *src = downgrade_table; + src < downgrade_table + ARRAY_SIZE(downgrade_table); + src++) { + if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) + continue; + + struct bch_sb_field_downgrade_entry *dst; + unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; + + ret = darray_make_room(&table, bytes); + if (ret) + goto out; + + dst = (void *) &darray_top(table); + dst->version = cpu_to_le16(src->version); + dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes); + dst->recovery_passes[1] = 0; + dst->nr_errors = cpu_to_le16(src->nr_errors); + for (unsigned i = 0; i < src->nr_errors; i++) + dst->errors[i] = cpu_to_le16(src->errors[i]); + + table.nr += bytes; + } + + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); + + unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); + + if (d && le32_to_cpu(d->field.u64s) > sb_u64s) + goto out; + + d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); + if (!d) { + ret = -BCH_ERR_ENOSPC_sb_downgrade; + goto out; + } + + memcpy(d->entries, table.data, table.nr); + memset_u64s_tail(d->entries, 0, table.nr); +out: + darray_exit(&table); + return ret; +} + +void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) +{ + struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); + if (!d) + return; + + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + for_each_downgrade_entry(d, i) { + unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); + if (new_minor < minor && minor <= old_minor) { + ext->recovery_passes_required[0] |= i->recovery_passes[0]; + ext->recovery_passes_required[1] |= i->recovery_passes[1]; + + for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { + unsigned e = le16_to_cpu(i->errors[j]); + if (e < BCH_SB_ERR_MAX) + __set_bit(e, c->sb.errors_silent); + if (e < sizeof(ext->errors_silent) * 8) + ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64)); + } + } + } +} diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h new file mode 100644 index 000000000000..bc48fd2ca70e --- /dev/null +++ b/fs/bcachefs/sb-downgrade.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_SB_DOWNGRADE_H +#define _BCACHEFS_SB_DOWNGRADE_H + +extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; + +int bch2_sb_downgrade_update(struct bch_fs *); +void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); + +#endif /* _BCACHEFS_SB_DOWNGRADE_H */ diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index f0930ab7f036..5f5bcae391fb 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -4,7 +4,7 @@ #include "sb-errors.h" #include "super-io.h" -static const char * const bch2_sb_error_strs[] = { +const char * const bch2_sb_error_strs[] = { #define x(t, n, ...) [n] = #t, BCH_SB_ERRS() NULL @@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e) { - return e - ? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0]) - : 0; + return bch2_sb_field_nr_entries(e); } static inline unsigned bch2_sb_field_errors_u64s(unsigned nr) diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 7557fe94f06d..8889001e7db4 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -4,258 +4,7 @@ #include "sb-errors_types.h" -#define BCH_SB_ERRS() \ - x(clean_but_journal_not_empty, 0) \ - x(dirty_but_no_journal_entries, 1) \ - x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ - x(sb_clean_journal_seq_mismatch, 3) \ - x(sb_clean_btree_root_mismatch, 4) \ - x(sb_clean_missing, 5) \ - x(jset_unsupported_version, 6) \ - x(jset_unknown_csum, 7) \ - x(jset_last_seq_newer_than_seq, 8) \ - x(jset_past_bucket_end, 9) \ - x(jset_seq_blacklisted, 10) \ - x(journal_entries_missing, 11) \ - x(journal_entry_replicas_not_marked, 12) \ - x(journal_entry_past_jset_end, 13) \ - x(journal_entry_replicas_data_mismatch, 14) \ - x(journal_entry_bkey_u64s_0, 15) \ - x(journal_entry_bkey_past_end, 16) \ - x(journal_entry_bkey_bad_format, 17) \ - x(journal_entry_bkey_invalid, 18) \ - x(journal_entry_btree_root_bad_size, 19) \ - x(journal_entry_blacklist_bad_size, 20) \ - x(journal_entry_blacklist_v2_bad_size, 21) \ - x(journal_entry_blacklist_v2_start_past_end, 22) \ - x(journal_entry_usage_bad_size, 23) \ - x(journal_entry_data_usage_bad_size, 24) \ - x(journal_entry_clock_bad_size, 25) \ - x(journal_entry_clock_bad_rw, 26) \ - x(journal_entry_dev_usage_bad_size, 27) \ - x(journal_entry_dev_usage_bad_dev, 28) \ - x(journal_entry_dev_usage_bad_pad, 29) \ - x(btree_node_unreadable, 30) \ - x(btree_node_fault_injected, 31) \ - x(btree_node_bad_magic, 32) \ - x(btree_node_bad_seq, 33) \ - x(btree_node_unsupported_version, 34) \ - x(btree_node_bset_older_than_sb_min, 35) \ - x(btree_node_bset_newer_than_sb, 36) \ - x(btree_node_data_missing, 37) \ - x(btree_node_bset_after_end, 38) \ - x(btree_node_replicas_sectors_written_mismatch, 39) \ - x(btree_node_replicas_data_mismatch, 40) \ - x(bset_unknown_csum, 41) \ - x(bset_bad_csum, 42) \ - x(bset_past_end_of_btree_node, 43) \ - x(bset_wrong_sector_offset, 44) \ - x(bset_empty, 45) \ - x(bset_bad_seq, 46) \ - x(bset_blacklisted_journal_seq, 47) \ - x(first_bset_blacklisted_journal_seq, 48) \ - x(btree_node_bad_btree, 49) \ - x(btree_node_bad_level, 50) \ - x(btree_node_bad_min_key, 51) \ - x(btree_node_bad_max_key, 52) \ - x(btree_node_bad_format, 53) \ - x(btree_node_bkey_past_bset_end, 54) \ - x(btree_node_bkey_bad_format, 55) \ - x(btree_node_bad_bkey, 56) \ - x(btree_node_bkey_out_of_order, 57) \ - x(btree_root_bkey_invalid, 58) \ - x(btree_root_read_error, 59) \ - x(btree_root_bad_min_key, 60) \ - x(btree_root_bad_max_key, 61) \ - x(btree_node_read_error, 62) \ - x(btree_node_topology_bad_min_key, 63) \ - x(btree_node_topology_bad_max_key, 64) \ - x(btree_node_topology_overwritten_by_prev_node, 65) \ - x(btree_node_topology_overwritten_by_next_node, 66) \ - x(btree_node_topology_interior_node_empty, 67) \ - x(fs_usage_hidden_wrong, 68) \ - x(fs_usage_btree_wrong, 69) \ - x(fs_usage_data_wrong, 70) \ - x(fs_usage_cached_wrong, 71) \ - x(fs_usage_reserved_wrong, 72) \ - x(fs_usage_persistent_reserved_wrong, 73) \ - x(fs_usage_nr_inodes_wrong, 74) \ - x(fs_usage_replicas_wrong, 75) \ - x(dev_usage_buckets_wrong, 76) \ - x(dev_usage_sectors_wrong, 77) \ - x(dev_usage_fragmented_wrong, 78) \ - x(dev_usage_buckets_ec_wrong, 79) \ - x(bkey_version_in_future, 80) \ - x(bkey_u64s_too_small, 81) \ - x(bkey_invalid_type_for_btree, 82) \ - x(bkey_extent_size_zero, 83) \ - x(bkey_extent_size_greater_than_offset, 84) \ - x(bkey_size_nonzero, 85) \ - x(bkey_snapshot_nonzero, 86) \ - x(bkey_snapshot_zero, 87) \ - x(bkey_at_pos_max, 88) \ - x(bkey_before_start_of_btree_node, 89) \ - x(bkey_after_end_of_btree_node, 90) \ - x(bkey_val_size_nonzero, 91) \ - x(bkey_val_size_too_small, 92) \ - x(alloc_v1_val_size_bad, 93) \ - x(alloc_v2_unpack_error, 94) \ - x(alloc_v3_unpack_error, 95) \ - x(alloc_v4_val_size_bad, 96) \ - x(alloc_v4_backpointers_start_bad, 97) \ - x(alloc_key_data_type_bad, 98) \ - x(alloc_key_empty_but_have_data, 99) \ - x(alloc_key_dirty_sectors_0, 100) \ - x(alloc_key_data_type_inconsistency, 101) \ - x(alloc_key_to_missing_dev_bucket, 102) \ - x(alloc_key_cached_inconsistency, 103) \ - x(alloc_key_cached_but_read_time_zero, 104) \ - x(alloc_key_to_missing_lru_entry, 105) \ - x(alloc_key_data_type_wrong, 106) \ - x(alloc_key_gen_wrong, 107) \ - x(alloc_key_dirty_sectors_wrong, 108) \ - x(alloc_key_cached_sectors_wrong, 109) \ - x(alloc_key_stripe_wrong, 110) \ - x(alloc_key_stripe_redundancy_wrong, 111) \ - x(bucket_sector_count_overflow, 112) \ - x(bucket_metadata_type_mismatch, 113) \ - x(need_discard_key_wrong, 114) \ - x(freespace_key_wrong, 115) \ - x(freespace_hole_missing, 116) \ - x(bucket_gens_val_size_bad, 117) \ - x(bucket_gens_key_wrong, 118) \ - x(bucket_gens_hole_wrong, 119) \ - x(bucket_gens_to_invalid_dev, 120) \ - x(bucket_gens_to_invalid_buckets, 121) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122) \ - x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ - x(need_discard_freespace_key_bad, 124) \ - x(backpointer_pos_wrong, 125) \ - x(backpointer_to_missing_device, 126) \ - x(backpointer_to_missing_alloc, 127) \ - x(backpointer_to_missing_ptr, 128) \ - x(lru_entry_at_time_0, 129) \ - x(lru_entry_to_invalid_bucket, 130) \ - x(lru_entry_bad, 131) \ - x(btree_ptr_val_too_big, 132) \ - x(btree_ptr_v2_val_too_big, 133) \ - x(btree_ptr_has_non_ptr, 134) \ - x(extent_ptrs_invalid_entry, 135) \ - x(extent_ptrs_no_ptrs, 136) \ - x(extent_ptrs_too_many_ptrs, 137) \ - x(extent_ptrs_redundant_crc, 138) \ - x(extent_ptrs_redundant_stripe, 139) \ - x(extent_ptrs_unwritten, 140) \ - x(extent_ptrs_written_and_unwritten, 141) \ - x(ptr_to_invalid_device, 142) \ - x(ptr_to_duplicate_device, 143) \ - x(ptr_after_last_bucket, 144) \ - x(ptr_before_first_bucket, 145) \ - x(ptr_spans_multiple_buckets, 146) \ - x(ptr_to_missing_backpointer, 147) \ - x(ptr_to_missing_alloc_key, 148) \ - x(ptr_to_missing_replicas_entry, 149) \ - x(ptr_to_missing_stripe, 150) \ - x(ptr_to_incorrect_stripe, 151) \ - x(ptr_gen_newer_than_bucket_gen, 152) \ - x(ptr_too_stale, 153) \ - x(stale_dirty_ptr, 154) \ - x(ptr_bucket_data_type_mismatch, 155) \ - x(ptr_cached_and_erasure_coded, 156) \ - x(ptr_crc_uncompressed_size_too_small, 157) \ - x(ptr_crc_csum_type_unknown, 158) \ - x(ptr_crc_compression_type_unknown, 159) \ - x(ptr_crc_redundant, 160) \ - x(ptr_crc_uncompressed_size_too_big, 161) \ - x(ptr_crc_nonce_mismatch, 162) \ - x(ptr_stripe_redundant, 163) \ - x(reservation_key_nr_replicas_invalid, 164) \ - x(reflink_v_refcount_wrong, 165) \ - x(reflink_p_to_missing_reflink_v, 166) \ - x(stripe_pos_bad, 167) \ - x(stripe_val_size_bad, 168) \ - x(stripe_sector_count_wrong, 169) \ - x(snapshot_tree_pos_bad, 170) \ - x(snapshot_tree_to_missing_snapshot, 171) \ - x(snapshot_tree_to_missing_subvol, 172) \ - x(snapshot_tree_to_wrong_subvol, 173) \ - x(snapshot_tree_to_snapshot_subvol, 174) \ - x(snapshot_pos_bad, 175) \ - x(snapshot_parent_bad, 176) \ - x(snapshot_children_not_normalized, 177) \ - x(snapshot_child_duplicate, 178) \ - x(snapshot_child_bad, 179) \ - x(snapshot_skiplist_not_normalized, 180) \ - x(snapshot_skiplist_bad, 181) \ - x(snapshot_should_not_have_subvol, 182) \ - x(snapshot_to_bad_snapshot_tree, 183) \ - x(snapshot_bad_depth, 184) \ - x(snapshot_bad_skiplist, 185) \ - x(subvol_pos_bad, 186) \ - x(subvol_not_master_and_not_snapshot, 187) \ - x(subvol_to_missing_root, 188) \ - x(subvol_root_wrong_bi_subvol, 189) \ - x(bkey_in_missing_snapshot, 190) \ - x(inode_pos_inode_nonzero, 191) \ - x(inode_pos_blockdev_range, 192) \ - x(inode_unpack_error, 193) \ - x(inode_str_hash_invalid, 194) \ - x(inode_v3_fields_start_bad, 195) \ - x(inode_snapshot_mismatch, 196) \ - x(inode_unlinked_but_clean, 197) \ - x(inode_unlinked_but_nlink_nonzero, 198) \ - x(inode_checksum_type_invalid, 199) \ - x(inode_compression_type_invalid, 200) \ - x(inode_subvol_root_but_not_dir, 201) \ - x(inode_i_size_dirty_but_clean, 202) \ - x(inode_i_sectors_dirty_but_clean, 203) \ - x(inode_i_sectors_wrong, 204) \ - x(inode_dir_wrong_nlink, 205) \ - x(inode_dir_multiple_links, 206) \ - x(inode_multiple_links_but_nlink_0, 207) \ - x(inode_wrong_backpointer, 208) \ - x(inode_wrong_nlink, 209) \ - x(inode_unreachable, 210) \ - x(deleted_inode_but_clean, 211) \ - x(deleted_inode_missing, 212) \ - x(deleted_inode_is_dir, 213) \ - x(deleted_inode_not_unlinked, 214) \ - x(extent_overlapping, 215) \ - x(extent_in_missing_inode, 216) \ - x(extent_in_non_reg_inode, 217) \ - x(extent_past_end_of_inode, 218) \ - x(dirent_empty_name, 219) \ - x(dirent_val_too_big, 220) \ - x(dirent_name_too_long, 221) \ - x(dirent_name_embedded_nul, 222) \ - x(dirent_name_dot_or_dotdot, 223) \ - x(dirent_name_has_slash, 224) \ - x(dirent_d_type_wrong, 225) \ - x(dirent_d_parent_subvol_wrong, 226) \ - x(dirent_in_missing_dir_inode, 227) \ - x(dirent_in_non_dir_inode, 228) \ - x(dirent_to_missing_inode, 229) \ - x(dirent_to_missing_subvol, 230) \ - x(dirent_to_itself, 231) \ - x(quota_type_invalid, 232) \ - x(xattr_val_size_too_small, 233) \ - x(xattr_val_size_too_big, 234) \ - x(xattr_invalid_type, 235) \ - x(xattr_name_invalid_chars, 236) \ - x(xattr_in_missing_inode, 237) \ - x(root_subvol_missing, 238) \ - x(root_dir_missing, 239) \ - x(root_inode_not_dir, 240) \ - x(dir_loop, 241) \ - x(hash_table_key_duplicate, 242) \ - x(hash_table_key_wrong_offset, 243) - -enum bch_sb_error_id { -#define x(t, n) BCH_FSCK_ERR_##t = n, - BCH_SB_ERRS() -#undef x - BCH_SB_ERR_MAX -}; +extern const char * const bch2_sb_error_strs[]; extern const struct bch_sb_field_ops bch_sb_field_ops_errors; diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index b1c099843a39..3504c2d09c29 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -4,6 +4,259 @@ #include "darray.h" +#define BCH_SB_ERRS() \ + x(clean_but_journal_not_empty, 0) \ + x(dirty_but_no_journal_entries, 1) \ + x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \ + x(sb_clean_journal_seq_mismatch, 3) \ + x(sb_clean_btree_root_mismatch, 4) \ + x(sb_clean_missing, 5) \ + x(jset_unsupported_version, 6) \ + x(jset_unknown_csum, 7) \ + x(jset_last_seq_newer_than_seq, 8) \ + x(jset_past_bucket_end, 9) \ + x(jset_seq_blacklisted, 10) \ + x(journal_entries_missing, 11) \ + x(journal_entry_replicas_not_marked, 12) \ + x(journal_entry_past_jset_end, 13) \ + x(journal_entry_replicas_data_mismatch, 14) \ + x(journal_entry_bkey_u64s_0, 15) \ + x(journal_entry_bkey_past_end, 16) \ + x(journal_entry_bkey_bad_format, 17) \ + x(journal_entry_bkey_invalid, 18) \ + x(journal_entry_btree_root_bad_size, 19) \ + x(journal_entry_blacklist_bad_size, 20) \ + x(journal_entry_blacklist_v2_bad_size, 21) \ + x(journal_entry_blacklist_v2_start_past_end, 22) \ + x(journal_entry_usage_bad_size, 23) \ + x(journal_entry_data_usage_bad_size, 24) \ + x(journal_entry_clock_bad_size, 25) \ + x(journal_entry_clock_bad_rw, 26) \ + x(journal_entry_dev_usage_bad_size, 27) \ + x(journal_entry_dev_usage_bad_dev, 28) \ + x(journal_entry_dev_usage_bad_pad, 29) \ + x(btree_node_unreadable, 30) \ + x(btree_node_fault_injected, 31) \ + x(btree_node_bad_magic, 32) \ + x(btree_node_bad_seq, 33) \ + x(btree_node_unsupported_version, 34) \ + x(btree_node_bset_older_than_sb_min, 35) \ + x(btree_node_bset_newer_than_sb, 36) \ + x(btree_node_data_missing, 37) \ + x(btree_node_bset_after_end, 38) \ + x(btree_node_replicas_sectors_written_mismatch, 39) \ + x(btree_node_replicas_data_mismatch, 40) \ + x(bset_unknown_csum, 41) \ + x(bset_bad_csum, 42) \ + x(bset_past_end_of_btree_node, 43) \ + x(bset_wrong_sector_offset, 44) \ + x(bset_empty, 45) \ + x(bset_bad_seq, 46) \ + x(bset_blacklisted_journal_seq, 47) \ + x(first_bset_blacklisted_journal_seq, 48) \ + x(btree_node_bad_btree, 49) \ + x(btree_node_bad_level, 50) \ + x(btree_node_bad_min_key, 51) \ + x(btree_node_bad_max_key, 52) \ + x(btree_node_bad_format, 53) \ + x(btree_node_bkey_past_bset_end, 54) \ + x(btree_node_bkey_bad_format, 55) \ + x(btree_node_bad_bkey, 56) \ + x(btree_node_bkey_out_of_order, 57) \ + x(btree_root_bkey_invalid, 58) \ + x(btree_root_read_error, 59) \ + x(btree_root_bad_min_key, 60) \ + x(btree_root_bad_max_key, 61) \ + x(btree_node_read_error, 62) \ + x(btree_node_topology_bad_min_key, 63) \ + x(btree_node_topology_bad_max_key, 64) \ + x(btree_node_topology_overwritten_by_prev_node, 65) \ + x(btree_node_topology_overwritten_by_next_node, 66) \ + x(btree_node_topology_interior_node_empty, 67) \ + x(fs_usage_hidden_wrong, 68) \ + x(fs_usage_btree_wrong, 69) \ + x(fs_usage_data_wrong, 70) \ + x(fs_usage_cached_wrong, 71) \ + x(fs_usage_reserved_wrong, 72) \ + x(fs_usage_persistent_reserved_wrong, 73) \ + x(fs_usage_nr_inodes_wrong, 74) \ + x(fs_usage_replicas_wrong, 75) \ + x(dev_usage_buckets_wrong, 76) \ + x(dev_usage_sectors_wrong, 77) \ + x(dev_usage_fragmented_wrong, 78) \ + x(dev_usage_buckets_ec_wrong, 79) \ + x(bkey_version_in_future, 80) \ + x(bkey_u64s_too_small, 81) \ + x(bkey_invalid_type_for_btree, 82) \ + x(bkey_extent_size_zero, 83) \ + x(bkey_extent_size_greater_than_offset, 84) \ + x(bkey_size_nonzero, 85) \ + x(bkey_snapshot_nonzero, 86) \ + x(bkey_snapshot_zero, 87) \ + x(bkey_at_pos_max, 88) \ + x(bkey_before_start_of_btree_node, 89) \ + x(bkey_after_end_of_btree_node, 90) \ + x(bkey_val_size_nonzero, 91) \ + x(bkey_val_size_too_small, 92) \ + x(alloc_v1_val_size_bad, 93) \ + x(alloc_v2_unpack_error, 94) \ + x(alloc_v3_unpack_error, 95) \ + x(alloc_v4_val_size_bad, 96) \ + x(alloc_v4_backpointers_start_bad, 97) \ + x(alloc_key_data_type_bad, 98) \ + x(alloc_key_empty_but_have_data, 99) \ + x(alloc_key_dirty_sectors_0, 100) \ + x(alloc_key_data_type_inconsistency, 101) \ + x(alloc_key_to_missing_dev_bucket, 102) \ + x(alloc_key_cached_inconsistency, 103) \ + x(alloc_key_cached_but_read_time_zero, 104) \ + x(alloc_key_to_missing_lru_entry, 105) \ + x(alloc_key_data_type_wrong, 106) \ + x(alloc_key_gen_wrong, 107) \ + x(alloc_key_dirty_sectors_wrong, 108) \ + x(alloc_key_cached_sectors_wrong, 109) \ + x(alloc_key_stripe_wrong, 110) \ + x(alloc_key_stripe_redundancy_wrong, 111) \ + x(bucket_sector_count_overflow, 112) \ + x(bucket_metadata_type_mismatch, 113) \ + x(need_discard_key_wrong, 114) \ + x(freespace_key_wrong, 115) \ + x(freespace_hole_missing, 116) \ + x(bucket_gens_val_size_bad, 117) \ + x(bucket_gens_key_wrong, 118) \ + x(bucket_gens_hole_wrong, 119) \ + x(bucket_gens_to_invalid_dev, 120) \ + x(bucket_gens_to_invalid_buckets, 121) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122) \ + x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \ + x(need_discard_freespace_key_bad, 124) \ + x(backpointer_pos_wrong, 125) \ + x(backpointer_to_missing_device, 126) \ + x(backpointer_to_missing_alloc, 127) \ + x(backpointer_to_missing_ptr, 128) \ + x(lru_entry_at_time_0, 129) \ + x(lru_entry_to_invalid_bucket, 130) \ + x(lru_entry_bad, 131) \ + x(btree_ptr_val_too_big, 132) \ + x(btree_ptr_v2_val_too_big, 133) \ + x(btree_ptr_has_non_ptr, 134) \ + x(extent_ptrs_invalid_entry, 135) \ + x(extent_ptrs_no_ptrs, 136) \ + x(extent_ptrs_too_many_ptrs, 137) \ + x(extent_ptrs_redundant_crc, 138) \ + x(extent_ptrs_redundant_stripe, 139) \ + x(extent_ptrs_unwritten, 140) \ + x(extent_ptrs_written_and_unwritten, 141) \ + x(ptr_to_invalid_device, 142) \ + x(ptr_to_duplicate_device, 143) \ + x(ptr_after_last_bucket, 144) \ + x(ptr_before_first_bucket, 145) \ + x(ptr_spans_multiple_buckets, 146) \ + x(ptr_to_missing_backpointer, 147) \ + x(ptr_to_missing_alloc_key, 148) \ + x(ptr_to_missing_replicas_entry, 149) \ + x(ptr_to_missing_stripe, 150) \ + x(ptr_to_incorrect_stripe, 151) \ + x(ptr_gen_newer_than_bucket_gen, 152) \ + x(ptr_too_stale, 153) \ + x(stale_dirty_ptr, 154) \ + x(ptr_bucket_data_type_mismatch, 155) \ + x(ptr_cached_and_erasure_coded, 156) \ + x(ptr_crc_uncompressed_size_too_small, 157) \ + x(ptr_crc_csum_type_unknown, 158) \ + x(ptr_crc_compression_type_unknown, 159) \ + x(ptr_crc_redundant, 160) \ + x(ptr_crc_uncompressed_size_too_big, 161) \ + x(ptr_crc_nonce_mismatch, 162) \ + x(ptr_stripe_redundant, 163) \ + x(reservation_key_nr_replicas_invalid, 164) \ + x(reflink_v_refcount_wrong, 165) \ + x(reflink_p_to_missing_reflink_v, 166) \ + x(stripe_pos_bad, 167) \ + x(stripe_val_size_bad, 168) \ + x(stripe_sector_count_wrong, 169) \ + x(snapshot_tree_pos_bad, 170) \ + x(snapshot_tree_to_missing_snapshot, 171) \ + x(snapshot_tree_to_missing_subvol, 172) \ + x(snapshot_tree_to_wrong_subvol, 173) \ + x(snapshot_tree_to_snapshot_subvol, 174) \ + x(snapshot_pos_bad, 175) \ + x(snapshot_parent_bad, 176) \ + x(snapshot_children_not_normalized, 177) \ + x(snapshot_child_duplicate, 178) \ + x(snapshot_child_bad, 179) \ + x(snapshot_skiplist_not_normalized, 180) \ + x(snapshot_skiplist_bad, 181) \ + x(snapshot_should_not_have_subvol, 182) \ + x(snapshot_to_bad_snapshot_tree, 183) \ + x(snapshot_bad_depth, 184) \ + x(snapshot_bad_skiplist, 185) \ + x(subvol_pos_bad, 186) \ + x(subvol_not_master_and_not_snapshot, 187) \ + x(subvol_to_missing_root, 188) \ + x(subvol_root_wrong_bi_subvol, 189) \ + x(bkey_in_missing_snapshot, 190) \ + x(inode_pos_inode_nonzero, 191) \ + x(inode_pos_blockdev_range, 192) \ + x(inode_unpack_error, 193) \ + x(inode_str_hash_invalid, 194) \ + x(inode_v3_fields_start_bad, 195) \ + x(inode_snapshot_mismatch, 196) \ + x(inode_unlinked_but_clean, 197) \ + x(inode_unlinked_but_nlink_nonzero, 198) \ + x(inode_checksum_type_invalid, 199) \ + x(inode_compression_type_invalid, 200) \ + x(inode_subvol_root_but_not_dir, 201) \ + x(inode_i_size_dirty_but_clean, 202) \ + x(inode_i_sectors_dirty_but_clean, 203) \ + x(inode_i_sectors_wrong, 204) \ + x(inode_dir_wrong_nlink, 205) \ + x(inode_dir_multiple_links, 206) \ + x(inode_multiple_links_but_nlink_0, 207) \ + x(inode_wrong_backpointer, 208) \ + x(inode_wrong_nlink, 209) \ + x(inode_unreachable, 210) \ + x(deleted_inode_but_clean, 211) \ + x(deleted_inode_missing, 212) \ + x(deleted_inode_is_dir, 213) \ + x(deleted_inode_not_unlinked, 214) \ + x(extent_overlapping, 215) \ + x(extent_in_missing_inode, 216) \ + x(extent_in_non_reg_inode, 217) \ + x(extent_past_end_of_inode, 218) \ + x(dirent_empty_name, 219) \ + x(dirent_val_too_big, 220) \ + x(dirent_name_too_long, 221) \ + x(dirent_name_embedded_nul, 222) \ + x(dirent_name_dot_or_dotdot, 223) \ + x(dirent_name_has_slash, 224) \ + x(dirent_d_type_wrong, 225) \ + x(dirent_d_parent_subvol_wrong, 226) \ + x(dirent_in_missing_dir_inode, 227) \ + x(dirent_in_non_dir_inode, 228) \ + x(dirent_to_missing_inode, 229) \ + x(dirent_to_missing_subvol, 230) \ + x(dirent_to_itself, 231) \ + x(quota_type_invalid, 232) \ + x(xattr_val_size_too_small, 233) \ + x(xattr_val_size_too_big, 234) \ + x(xattr_invalid_type, 235) \ + x(xattr_name_invalid_chars, 236) \ + x(xattr_in_missing_inode, 237) \ + x(root_subvol_missing, 238) \ + x(root_dir_missing, 239) \ + x(root_inode_not_dir, 240) \ + x(dir_loop, 241) \ + x(hash_table_key_duplicate, 242) \ + x(hash_table_key_wrong_offset, 243) + +enum bch_sb_error_id { +#define x(t, n) BCH_FSCK_ERR_##t = n, + BCH_SB_ERRS() +#undef x + BCH_SB_ERR_MAX +}; + struct bch_sb_error_entry_cpu { u64 id:16, nr:48; diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index fccd25aa3242..22b34a8e4d6e 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -146,6 +146,24 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol, return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s); } +int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) +{ + struct bch_subvolume s; + int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s); + if (ret) + return ret; + + if (BCH_SUBVOLUME_RO(&s)) + return -EROFS; + return 0; +} + +int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) +{ + return bch2_trans_do(c, NULL, NULL, 0, + bch2_subvol_is_ro_trans(trans, subvol)); +} + int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, struct bch_subvolume *subvol) { diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index a1003d30ab0a..a6f56f66e27c 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -23,6 +23,9 @@ int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); +int bch2_subvol_is_ro_trans(struct btree_trans *, u32); +int bch2_subvol_is_ro(struct bch_fs *, u32); + int bch2_delete_dead_snapshots(struct bch_fs *); void bch2_delete_dead_snapshots_async(struct bch_fs *); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index f3e12f7979d5..4c98d8cc2a79 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -13,6 +13,7 @@ #include "replicas.h" #include "quota.h" #include "sb-clean.h" +#include "sb-downgrade.h" #include "sb-errors.h" #include "sb-members.h" #include "super-io.h" @@ -264,6 +265,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, return f; } +struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb, + enum bch_sb_field_type type, + unsigned u64s) +{ + struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); + + if (!f || le32_to_cpu(f->u64s) < u64s) + f = bch2_sb_field_resize_id(sb, type, u64s); + return f; +} + /* Superblock validate: */ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) @@ -484,6 +496,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out, /* device open: */ +static unsigned long le_ulong_to_cpu(unsigned long v) +{ + return sizeof(unsigned long) == 8 + ? le64_to_cpu(v) + : le32_to_cpu(v); +} + +static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr) +{ + BUG_ON(nr & (BITS_PER_TYPE(long) - 1)); + + for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++) + dst[i] = le_ulong_to_cpu(src[i]); +} + static void bch2_sb_update(struct bch_fs *c) { struct bch_sb *src = c->disk_sb.sb; @@ -512,8 +539,15 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.features = le64_to_cpu(src->features[0]); c->sb.compat = le64_to_cpu(src->compat[0]); + memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); + + struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); + if (ext) + le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, + sizeof(c->sb.errors_silent) * 8); + for_each_member_device(ca, c, i) { - struct bch_member m = bch2_sb_member_get(src, i); + struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); ca->mi = bch2_mi_to_cpu(&m); } } @@ -906,6 +940,7 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_members_from_cpu(c); bch2_sb_members_cpy_v2_v1(&c->disk_sb); bch2_sb_errors_from_cpu(c); + bch2_sb_downgrade_update(c); for_each_online_member(ca, c, i) bch2_sb_from_fs(c, ca); @@ -1029,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) } /* Downgrade if superblock is at a higher version than currently supported: */ -void bch2_sb_maybe_downgrade(struct bch_fs *c) +bool bch2_check_version_downgrade(struct bch_fs *c) { + bool ret = bcachefs_metadata_version_current < c->sb.version; + lockdep_assert_held(&c->sb_lock); /* @@ -1044,16 +1081,61 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c) if (c->sb.version_min > bcachefs_metadata_version_current) c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); + return ret; } void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) { lockdep_assert_held(&c->sb_lock); + if (BCH_VERSION_MAJOR(new_version) > + BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) + bch2_sb_field_resize(&c->disk_sb, downgrade, 0); + c->disk_sb.sb->version = cpu_to_le16(new_version); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); } +static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, + struct printbuf *err) +{ + if (vstruct_bytes(f) < 88) { + prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88); + return -BCH_ERR_invalid_sb_ext; + } + + return 0; +} + +static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, + struct bch_sb_field *f) +{ + struct bch_sb_field_ext *e = field_to_type(f, ext); + + prt_printf(out, "Recovery passes required:"); + prt_tab(out); + prt_bitflags(out, bch2_recovery_passes, + bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0]))); + prt_newline(out); + + unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL); + if (errors_silent) { + le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); + + prt_printf(out, "Errors to silently fix:"); + prt_tab(out); + prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8); + prt_newline(out); + + kfree(errors_silent); + } +} + +static const struct bch_sb_field_ops bch_sb_field_ops_ext = { + .validate = bch2_sb_ext_validate, + .to_text = bch2_sb_ext_to_text, +}; + static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { #define x(f, nr) \ [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index f5abd102bff7..e41e5de531a0 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -40,6 +40,16 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *, #define bch2_sb_field_resize(_sb, _name, _u64s) \ field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name) +struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *, + enum bch_sb_field_type, unsigned); +#define bch2_sb_field_get_minsize(_sb, _name, _u64s) \ + field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name) + +#define bch2_sb_field_nr_entries(_f) \ + (_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) / \ + sizeof(_f->entries[0])) \ + : 0) + void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type); extern const char * const bch2_sb_fields[]; @@ -83,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat) __bch2_check_set_feature(c, feat); } -void bch2_sb_maybe_downgrade(struct bch_fs *); +bool bch2_check_version_downgrade(struct bch_fs *); void bch2_sb_upgrade(struct bch_fs *, unsigned); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 2984b57b2958..b701f7fe0784 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -243,6 +243,7 @@ do { \ #define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__) #define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__) #define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__) +#define prt_bitflags_vector(...) bch2_prt_bitflags_vector(__VA_ARGS__) void bch2_pr_time_units(struct printbuf *, u64); void bch2_prt_datetime(struct printbuf *, time64_t); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 79d982674c18..5a1858fb9879 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -176,7 +176,8 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum, struct btree_iter inode_iter = { NULL }; int ret; - ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); + ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?: + bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT); if (ret) return ret;