More bcachefs bugfixes for 6.7, and forwards compatibility work:

- fix for a nasty extents + snapshot interaction, reported when reflink
    of a snapshotted file wouldn't complete but turned out to be a more
    general bug
  - fix for an invalid free in dio write path when iov vector was longer
    than our inline vecotr
  - fix for a buffer overflow in the nocow write path - BCH_REPLICAS_MAX
    doesn't actually limit the number of pointers in an extent when
    cached pointers are included
  - RO snapshots are actually RO now
  - And, a new superblock section to avoid future breakage when the disk
    space acounting rewrite rolls out: the new superblock section
    describes versions that need work to downgrade, where the work
    required is a list of recovery passes and errors to silently fix.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmWS7fQACgkQE6szbY3K
 bnbAyg/8C4HGcCCFYfxwd4cgQm6kMZBlnGBpjNofDH+DixLAbyta7Q5C5kyB9K4V
 h57ZWPOft8gPcsmoUTAoU0PtlvNuJr59apDWH/A25YjK5iIkrtuP/wMFYwIeRJWs
 xfzTfEzBDeOK2BZTwiLbtRBR7lFxIwy4kalokCdQXCXMaeu0F7WWItvHU+/5iHxM
 wqoqAVEQdjm4vjut3l6KS5rkcxCT0BxA9LzCxDrJ/LyRe+ZFZ3/J6EQq/Cuza43a
 5remjPYtQZMjnw85lmw6/tGUV++es4vHx+CRgO15DLV0hJeuPz61yGiqMqAyasuy
 655GK+BD9akF6cEa9mzlr95tLSyz5ExAwTxWpZiymRyypdNBEkz6jyfbzCyAujBY
 WvWogixs3Csgjdi481jd1uT7NQ+b3OfCH+8yx/Z7pfQH8tR1J7tQSHlei4EWO6Ya
 edAZ0pxstHMDeg4LT9tEJ2Zb8qZIGF3dEC8uzKlPc9RkRpcnFIFXML+LAISBGC59
 s1ac/8oKrrOMEnw2ZF0GCF00E62ETojjRf00HTo/baxPkDspsA+8Z/S/VbhkNkdF
 8JamFqsH8VyDK1RxtjQwWuDGI5gUJVhy/zormu9Iurl5gWr2CqdKNt2Z4eZT8hwt
 0ZXEh31tqaALD115rZHLh2ciLusyyFdxLxXPQzWDiu4A5JdJTWA=
 =HQCT
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs from Kent Overstreet:
 "More bcachefs bugfixes for 6.7, and forwards compatibility work:

   - fix for a nasty extents + snapshot interaction, reported when
     reflink of a snapshotted file wouldn't complete but turned out to
     be a more general bug

   - fix for an invalid free in dio write path when iov vector was
     longer than our inline vector

   - fix for a buffer overflow in the nocow write path -
     BCH_REPLICAS_MAX doesn't actually limit the number of pointers in
     an extent when cached pointers are included

   - RO snapshots are actually RO now

   - And, a new superblock section to avoid future breakage when the
     disk space acounting rewrite rolls out: the new superblock section
     describes versions that need work to downgrade, where the work
     required is a list of recovery passes and errors to silently fix"

* tag 'bcachefs-2024-01-01' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: make RO snapshots actually RO
  bcachefs: bch_sb_field_downgrade
  bcachefs: bch_sb.recovery_passes_required
  bcachefs: Add persistent identifiers for recovery passes
  bcachefs: prt_bitflags_vector()
  bcachefs: move BCH_SB_ERRS() to sb-errors_types.h
  bcachefs: fix buffer overflow in nocow write path
  bcachefs: DARRAY_PREALLOCATED()
  bcachefs: Switch darray to kvmalloc()
  bcachefs: Factor out darray resize slowpath
  bcachefs: fix setting version_upgrade_complete
  bcachefs: fix invalid free in dio write path
  bcachefs: Fix extents iteration + snapshots interaction
This commit is contained in:
Linus Torvalds 2024-01-03 11:35:48 -08:00
commit 981d04137a
30 changed files with 978 additions and 424 deletions

View file

@ -28,6 +28,7 @@ bcachefs-y := \
clock.o \
compress.o \
counters.o \
darray.o \
debug.o \
dirent.o \
disk_groups.o \
@ -70,6 +71,7 @@ bcachefs-y := \
reflink.o \
replicas.o \
sb-clean.o \
sb-downgrade.o \
sb-errors.o \
sb-members.o \
siphash.o \

View file

@ -366,7 +366,8 @@ int bch2_set_acl(struct mnt_idmap *idmap,
bch2_trans_begin(trans);
acl = _acl;
ret = bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
ret = bch2_subvol_is_ro_trans(trans, inode->ei_subvol) ?:
bch2_inode_peek(trans, &inode_iter, &inode_u, inode_inum(inode),
BTREE_ITER_INTENT);
if (ret)
goto btree_err;

View file

@ -737,6 +737,7 @@ struct bch_fs {
unsigned nsec_per_time_unit;
u64 features;
u64 compat;
unsigned long errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
} sb;

View file

@ -1207,19 +1207,21 @@ struct bch_sb_field {
};
#define BCH_SB_FIELDS() \
x(journal, 0) \
x(members_v1, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8) \
x(journal_v2, 9) \
x(counters, 10) \
x(members_v2, 11) \
x(errors, 12)
x(journal, 0) \
x(members_v1, 1) \
x(crypt, 2) \
x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
x(clean, 6) \
x(replicas, 7) \
x(journal_seq_blacklist, 8) \
x(journal_v2, 9) \
x(counters, 10) \
x(members_v2, 11) \
x(errors, 12) \
x(ext, 13) \
x(downgrade, 14)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@ -1631,6 +1633,24 @@ struct bch_sb_field_errors {
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
struct bch_sb_field_ext {
struct bch_sb_field field;
__le64 recovery_passes_required[2];
__le64 errors_silent[8];
};
struct bch_sb_field_downgrade_entry {
__le16 version;
__le64 recovery_passes[2];
__le16 nr_errors;
__le16 errors[] __counted_by(nr_errors);
} __packed __aligned(2);
struct bch_sb_field_downgrade {
struct bch_sb_field field;
struct bch_sb_field_downgrade_entry entries[];
};
/* Superblock: */
/*
@ -1644,6 +1664,11 @@ LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
/*
* field 1: version name
* field 2: BCH_VERSION(major, minor)
* field 3: recovery passess required on upgrade
*/
#define BCH_METADATA_VERSIONS() \
x(bkey_renumber, BCH_VERSION(0, 10), \
RECOVERY_PASS_ALL_FSCK) \

View file

@ -2085,18 +2085,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
goto out_no_locked;
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
* We need to check against @end before FILTER_SNAPSHOTS because
* if we get to a different inode that requested we might be
* seeing keys for a different snapshot tree that will all be
* filtered out.
*
* But we can't do the full check here, because bkey_start_pos()
* isn't monotonically increasing before FILTER_SNAPSHOTS, and
* that's what we check against in extents mode:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
if (k.k->p.inode > end.inode)
goto end;
if (iter->update_path &&
@ -2155,6 +2153,21 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
continue;
}
/*
* iter->pos should be mononotically increasing, and always be
* equal to the key we just returned - except extents can
* straddle iter->pos:
*/
if (!(iter->flags & BTREE_ITER_IS_EXTENTS))
iter_pos = k.k->p;
else
iter_pos = bkey_max(iter->pos, bkey_start_pos(k.k));
if (unlikely(!(iter->flags & BTREE_ITER_IS_EXTENTS)
? bkey_gt(iter_pos, end)
: bkey_ge(iter_pos, end)))
goto end;
break;
}

24
fs/bcachefs/darray.c Normal file
View file

@ -0,0 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/log2.h>
#include <linux/slab.h>
#include "darray.h"
int __bch2_darray_resize(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
{
if (new_size > d->size) {
new_size = roundup_pow_of_two(new_size);
void *data = kvmalloc_array(new_size, element_size, gfp);
if (!data)
return -ENOMEM;
memcpy(data, d->data, d->size * element_size);
if (d->data != d->preallocated)
kvfree(d->data);
d->data = data;
d->size = new_size;
}
return 0;
}

View file

@ -8,39 +8,48 @@
* Inspired by CCAN's darray
*/
#include "util.h"
#include <linux/slab.h>
#define DARRAY(type) \
#define DARRAY_PREALLOCATED(_type, _nr) \
struct { \
size_t nr, size; \
type *data; \
_type *data; \
_type preallocated[_nr]; \
}
typedef DARRAY(void) darray_void;
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more, gfp_t gfp)
typedef DARRAY(char) darray_char;
int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
static inline int __darray_resize(darray_char *d, size_t element_size,
size_t new_size, gfp_t gfp)
{
if (d->nr + more > d->size) {
size_t new_size = roundup_pow_of_two(d->nr + more);
void *data = krealloc_array(d->data, new_size, t_size, gfp);
return unlikely(new_size > d->size)
? __bch2_darray_resize(d, element_size, new_size, gfp)
: 0;
}
if (!data)
return -ENOMEM;
#define darray_resize_gfp(_d, _new_size, _gfp) \
unlikely(__darray_resize((darray_char *) (_d), sizeof((_d)->data[0]), (_new_size), _gfp))
d->data = data;
d->size = new_size;
}
#define darray_resize(_d, _new_size) \
darray_resize_gfp(_d, _new_size, GFP_KERNEL)
return 0;
static inline int __darray_make_room(darray_char *d, size_t t_size, size_t more, gfp_t gfp)
{
return __darray_resize(d, t_size, d->nr + more, gfp);
}
#define darray_make_room_gfp(_d, _more, _gfp) \
__darray_make_room((darray_void *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
__darray_make_room((darray_char *) (_d), sizeof((_d)->data[0]), (_more), _gfp)
#define darray_make_room(_d, _more) \
darray_make_room_gfp(_d, _more, GFP_KERNEL)
#define darray_room(_d) ((_d).size - (_d).nr)
#define darray_top(_d) ((_d).data[(_d).nr])
#define darray_push_gfp(_d, _item, _gfp) \
@ -80,13 +89,16 @@ static inline int __darray_make_room(darray_void *d, size_t t_size, size_t more,
#define darray_init(_d) \
do { \
(_d)->data = NULL; \
(_d)->nr = (_d)->size = 0; \
(_d)->nr = 0; \
(_d)->size = ARRAY_SIZE((_d)->preallocated); \
(_d)->data = (_d)->size ? (_d)->preallocated : NULL; \
} while (0)
#define darray_exit(_d) \
do { \
kfree((_d)->data); \
if (!ARRAY_SIZE((_d)->preallocated) || \
(_d)->data != (_d)->preallocated) \
kvfree((_d)->data); \
darray_init(_d); \
} while (0)

View file

@ -95,6 +95,7 @@
x(ENOSPC, ENOSPC_sb_members) \
x(ENOSPC, ENOSPC_sb_members_v2) \
x(ENOSPC, ENOSPC_sb_crypt) \
x(ENOSPC, ENOSPC_sb_downgrade) \
x(ENOSPC, ENOSPC_btree_slot) \
x(ENOSPC, ENOSPC_snapshot_tree) \
x(ENOENT, ENOENT_bkey_type_mismatch) \
@ -218,6 +219,8 @@
x(BCH_ERR_invalid_sb, invalid_sb_quota) \
x(BCH_ERR_invalid_sb, invalid_sb_errors) \
x(BCH_ERR_invalid_sb, invalid_sb_opt_compression) \
x(BCH_ERR_invalid_sb, invalid_sb_ext) \
x(BCH_ERR_invalid_sb, invalid_sb_downgrade) \
x(BCH_ERR_invalid, invalid_bkey) \
x(BCH_ERR_operation_blocked, nocow_lock_blocked) \
x(EIO, btree_node_read_err) \

View file

@ -152,6 +152,9 @@ int bch2_fsck_err(struct bch_fs *c,
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
if (test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;
bch2_sb_error_count(c, err);
va_start(args, fmt);

View file

@ -216,11 +216,11 @@ struct dio_write {
struct address_space *mapping;
struct bch_inode_info *inode;
struct mm_struct *mm;
const struct iovec *iov;
unsigned loop:1,
extending:1,
sync:1,
flush:1,
free_iov:1;
flush:1;
struct quota_res quota_res;
u64 written;
@ -312,12 +312,10 @@ static noinline int bch2_dio_write_copy_iov(struct dio_write *dio)
return -1;
if (dio->iter.nr_segs > ARRAY_SIZE(dio->inline_vecs)) {
iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
dio->iov = iov = kmalloc_array(dio->iter.nr_segs, sizeof(*iov),
GFP_KERNEL);
if (unlikely(!iov))
return -ENOMEM;
dio->free_iov = true;
}
memcpy(iov, dio->iter.__iov, dio->iter.nr_segs * sizeof(*iov));
@ -381,8 +379,7 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
bch2_pagecache_block_put(inode);
if (dio->free_iov)
kfree(dio->iter.__iov);
kfree(dio->iov);
ret = dio->op.error ?: ((long) dio->written << 9);
bio_put(&dio->op.wbio.bio);
@ -626,11 +623,11 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
dio->mapping = mapping;
dio->inode = inode;
dio->mm = current->mm;
dio->iov = NULL;
dio->loop = false;
dio->extending = extending;
dio->sync = is_sync_kiocb(req) || extending;
dio->flush = iocb_is_dsync(req) && !c->opts.journal_flush_disabled;
dio->free_iov = false;
dio->quota_res.sectors = 0;
dio->written = 0;
dio->iter = *iter;

View file

@ -100,7 +100,8 @@ static int bch2_ioc_setflags(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
bch2_write_inode(c, inode, bch2_inode_flags_set, &s,
ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock);
@ -183,13 +184,10 @@ static int bch2_ioc_fssetxattr(struct bch_fs *c,
}
mutex_lock(&inode->ei_update_lock);
ret = bch2_set_projid(c, inode, fa.fsx_projid);
if (ret)
goto err_unlock;
ret = bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
bch2_set_projid(c, inode, fa.fsx_projid) ?:
bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,
ATTR_CTIME);
err_unlock:
mutex_unlock(&inode->ei_update_lock);
err:
inode_unlock(&inode->v);

View file

@ -258,7 +258,8 @@ __bch2_create(struct mnt_idmap *idmap,
retry:
bch2_trans_begin(trans);
ret = bch2_create_trans(trans,
ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:
bch2_create_trans(trans,
inode_inum(dir), &dir_u, &inode_u,
!(flags & BCH_CREATE_TMPFILE)
? &dentry->d_name : NULL,
@ -430,7 +431,9 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir,
lockdep_assert_held(&inode->v.i_rwsem);
ret = __bch2_link(c, inode, dir, dentry);
ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:
bch2_subvol_is_ro(c, inode->ei_subvol) ?:
__bch2_link(c, inode, dir, dentry);
if (unlikely(ret))
return ret;
@ -481,7 +484,11 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
{
return __bch2_unlink(vdir, dentry, false);
struct bch_inode_info *dir= to_bch_ei(vdir);
struct bch_fs *c = dir->v.i_sb->s_fs_info;
return bch2_subvol_is_ro(c, dir->ei_subvol) ?:
__bch2_unlink(vdir, dentry, false);
}
static int bch2_symlink(struct mnt_idmap *idmap,
@ -562,6 +569,11 @@ static int bch2_rename2(struct mnt_idmap *idmap,
src_inode,
dst_inode);
ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
if (ret)
goto err;
if (inode_attr_changing(dst_dir, src_inode, Inode_opt_project)) {
ret = bch2_fs_quota_transfer(c, src_inode,
dst_dir->ei_qid,
@ -783,11 +795,13 @@ static int bch2_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret;
lockdep_assert_held(&inode->v.i_rwsem);
ret = setattr_prepare(idmap, dentry, iattr);
ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:
setattr_prepare(idmap, dentry, iattr);
if (ret)
return ret;
@ -1010,12 +1024,26 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx)
return bch2_err_class(ret);
}
static int bch2_open(struct inode *vinode, struct file *file)
{
if (file->f_flags & (O_WRONLY|O_RDWR)) {
struct bch_inode_info *inode = to_bch_ei(vinode);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret = bch2_subvol_is_ro(c, inode->ei_subvol);
if (ret)
return ret;
}
return generic_file_open(vinode, file);
}
static const struct file_operations bch_file_operations = {
.open = bch2_open,
.llseek = bch2_llseek,
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
.open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,

View file

@ -1216,6 +1216,12 @@ static CLOSURE_CALLBACK(bch2_nocow_write_done)
bch2_write_done(cl);
}
struct bucket_to_lock {
struct bpos b;
unsigned gen;
struct nocow_lock_bucket *l;
};
static void bch2_nocow_write(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
@ -1224,18 +1230,16 @@ static void bch2_nocow_write(struct bch_write_op *op)
struct bkey_s_c k;
struct bkey_ptrs_c ptrs;
const struct bch_extent_ptr *ptr;
struct {
struct bpos b;
unsigned gen;
struct nocow_lock_bucket *l;
} buckets[BCH_REPLICAS_MAX];
unsigned nr_buckets = 0;
DARRAY_PREALLOCATED(struct bucket_to_lock, 3) buckets;
struct bucket_to_lock *i;
u32 snapshot;
int ret, i;
struct bucket_to_lock *stale_at;
int ret;
if (op->flags & BCH_WRITE_MOVE)
return;
darray_init(&buckets);
trans = bch2_trans_get(c);
retry:
bch2_trans_begin(trans);
@ -1250,7 +1254,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
while (1) {
struct bio *bio = &op->wbio.bio;
nr_buckets = 0;
buckets.nr = 0;
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
@ -1263,26 +1267,26 @@ static void bch2_nocow_write(struct bch_write_op *op)
break;
if (bch2_keylist_realloc(&op->insert_keys,
op->inline_keys,
ARRAY_SIZE(op->inline_keys),
k.k->u64s))
op->inline_keys,
ARRAY_SIZE(op->inline_keys),
k.k->u64s))
break;
/* Get iorefs before dropping btree locks: */
ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr(ptrs, ptr) {
buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
buckets[nr_buckets].gen = ptr->gen;
buckets[nr_buckets].l =
bucket_nocow_lock(&c->nocow_locks,
bucket_to_u64(buckets[nr_buckets].b));
prefetch(buckets[nr_buckets].l);
struct bpos b = PTR_BUCKET_POS(c, ptr);
struct nocow_lock_bucket *l =
bucket_nocow_lock(&c->nocow_locks, bucket_to_u64(b));
prefetch(l);
if (unlikely(!bch2_dev_get_ioref(bch_dev_bkey_exists(c, ptr->dev), WRITE)))
goto err_get_ioref;
nr_buckets++;
/* XXX allocating memory with btree locks held - rare */
darray_push_gfp(&buckets, ((struct bucket_to_lock) {
.b = b, .gen = ptr->gen, .l = l,
}), GFP_KERNEL|__GFP_NOFAIL);
if (ptr->unwritten)
op->flags |= BCH_WRITE_CONVERT_UNWRITTEN;
@ -1296,21 +1300,21 @@ static void bch2_nocow_write(struct bch_write_op *op)
if (op->flags & BCH_WRITE_CONVERT_UNWRITTEN)
bch2_cut_back(POS(op->pos.inode, op->pos.offset + bio_sectors(bio)), op->insert_keys.top);
for (i = 0; i < nr_buckets; i++) {
struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
struct nocow_lock_bucket *l = buckets[i].l;
bool stale;
darray_for_each(buckets, i) {
struct bch_dev *ca = bch_dev_bkey_exists(c, i->b.inode);
__bch2_bucket_nocow_lock(&c->nocow_locks, l,
bucket_to_u64(buckets[i].b),
__bch2_bucket_nocow_lock(&c->nocow_locks, i->l,
bucket_to_u64(i->b),
BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock();
stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
bool stale = gen_after(*bucket_gen(ca, i->b.offset), i->gen);
rcu_read_unlock();
if (unlikely(stale))
if (unlikely(stale)) {
stale_at = i;
goto err_bucket_stale;
}
}
bio = &op->wbio.bio;
@ -1346,15 +1350,14 @@ static void bch2_nocow_write(struct bch_write_op *op)
if (ret) {
bch_err_inum_offset_ratelimited(c,
op->pos.inode,
op->pos.offset << 9,
"%s: btree lookup error %s",
__func__, bch2_err_str(ret));
op->pos.inode, op->pos.offset << 9,
"%s: btree lookup error %s", __func__, bch2_err_str(ret));
op->error = ret;
op->flags |= BCH_WRITE_DONE;
}
bch2_trans_put(trans);
darray_exit(&buckets);
/* fallback to cow write path? */
if (!(op->flags & BCH_WRITE_DONE)) {
@ -1374,24 +1377,21 @@ static void bch2_nocow_write(struct bch_write_op *op)
}
return;
err_get_ioref:
for (i = 0; i < nr_buckets; i++)
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
darray_for_each(buckets, i)
percpu_ref_put(&bch_dev_bkey_exists(c, i->b.inode)->io_ref);
/* Fall back to COW path: */
goto out;
err_bucket_stale:
while (i >= 0) {
bch2_bucket_nocow_unlock(&c->nocow_locks,
buckets[i].b,
BUCKET_NOCOW_LOCK_UPDATE);
--i;
darray_for_each(buckets, i) {
bch2_bucket_nocow_unlock(&c->nocow_locks, i->b, BUCKET_NOCOW_LOCK_UPDATE);
if (i == stale_at)
break;
}
for (i = 0; i < nr_buckets; i++)
percpu_ref_put(&bch_dev_bkey_exists(c, buckets[i].b.inode)->io_ref);
/* We can retry this: */
ret = -BCH_ERR_transaction_restart;
goto out;
goto err_get_ioref;
}
static void __bch2_write(struct bch_write_op *op)

View file

@ -1,6 +1,7 @@
// SPDX-License-Identifier: LGPL-2.1+
/* Copyright (C) 2022 Kent Overstreet */
#include <linux/bitmap.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/kernel.h>
@ -423,3 +424,24 @@ void bch2_prt_bitflags(struct printbuf *out,
flags ^= BIT_ULL(bit);
}
}
void bch2_prt_bitflags_vector(struct printbuf *out,
const char * const list[],
unsigned long *v, unsigned nr)
{
bool first = true;
unsigned i;
for (i = 0; i < nr; i++)
if (!list[i]) {
nr = i - 1;
break;
}
for_each_set_bit(i, v, nr) {
if (!first)
bch2_prt_printf(out, ",");
first = false;
bch2_prt_printf(out, "%s", list[i]);
}
}

View file

@ -124,6 +124,8 @@ void bch2_prt_units_u64(struct printbuf *, u64);
void bch2_prt_units_s64(struct printbuf *, s64);
void bch2_prt_string_option(struct printbuf *, const char * const[], size_t);
void bch2_prt_bitflags(struct printbuf *, const char * const[], u64);
void bch2_prt_bitflags_vector(struct printbuf *, const char * const[],
unsigned long *, unsigned);
/* Initializer for a heap allocated printbuf: */
#define PRINTBUF ((struct printbuf) { .heap_allocated = true })

View file

@ -27,6 +27,7 @@
#include "recovery.h"
#include "replicas.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "snapshot.h"
#include "subvolume.h"
#include "super-io.h"
@ -481,7 +482,7 @@ static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
}
const char * const bch2_recovery_passes[] = {
#define x(_fn, _when) #_fn,
#define x(_fn, ...) #_fn,
BCH_RECOVERY_PASSES()
#undef x
NULL
@ -504,18 +505,47 @@ struct recovery_pass_fn {
};
static struct recovery_pass_fn recovery_pass_fns[] = {
#define x(_fn, _when) { .fn = bch2_##_fn, .when = _when },
#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when },
BCH_RECOVERY_PASSES()
#undef x
};
static void check_version_upgrade(struct bch_fs *c)
u64 bch2_recovery_passes_to_stable(u64 v)
{
static const u8 map[] = {
#define x(n, id, ...) [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
BCH_RECOVERY_PASSES()
#undef x
};
u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
if (v & BIT_ULL(i))
ret |= BIT_ULL(map[i]);
return ret;
}
u64 bch2_recovery_passes_from_stable(u64 v)
{
static const u8 map[] = {
#define x(n, id, ...) [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
BCH_RECOVERY_PASSES()
#undef x
};
u64 ret = 0;
for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
if (v & BIT_ULL(i))
ret |= BIT_ULL(map[i]);
return ret;
}
static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
unsigned latest_version = bcachefs_metadata_version_current;
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
u64 recovery_passes;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@ -559,7 +589,7 @@ static void check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version);
prt_newline(&buf);
recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
if (recovery_passes) {
if ((recovery_passes & RECOVERY_PASS_ALL_FSCK) == RECOVERY_PASS_ALL_FSCK)
prt_str(&buf, "fsck required");
@ -574,12 +604,13 @@ static void check_version_upgrade(struct bch_fs *c)
bch_info(c, "%s", buf.buf);
mutex_lock(&c->sb_lock);
bch2_sb_upgrade(c, new_version);
mutex_unlock(&c->sb_lock);
printbuf_exit(&buf);
return true;
}
return false;
}
u64 bch2_fsck_recovery_passes(void)
@ -654,7 +685,6 @@ int bch2_fs_recovery(struct bch_fs *c)
struct bch_sb_field_clean *clean = NULL;
struct jset *last_journal_entry = NULL;
u64 last_seq = 0, blacklist_seq, journal_seq;
bool write_sb = false;
int ret = 0;
if (c->sb.clean) {
@ -682,15 +712,73 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
if (c->opts.fsck || !(c->opts.nochanges && c->opts.norecovery))
check_version_upgrade(c);
if (c->opts.fsck && c->opts.norecovery) {
bch_err(c, "cannot select both norecovery and fsck");
ret = -EINVAL;
goto err;
}
if (!(c->opts.nochanges && c->opts.norecovery)) {
mutex_lock(&c->sb_lock);
bool write_sb = false;
struct bch_sb_field_ext *ext =
bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
if (!ext) {
ret = -BCH_ERR_ENOSPC_sb;
mutex_unlock(&c->sb_lock);
goto err;
}
if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
ext->recovery_passes_required[0] |=
cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
write_sb = true;
}
u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
if (sb_passes) {
struct printbuf buf = PRINTBUF;
prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
}
if (bch2_check_version_downgrade(c)) {
struct printbuf buf = PRINTBUF;
prt_str(&buf, "Version downgrade required:\n");
__le64 passes = ext->recovery_passes_required[0];
bch2_sb_set_downgrade(c,
BCH_VERSION_MINOR(bcachefs_metadata_version_current),
BCH_VERSION_MINOR(c->sb.version));
passes = ext->recovery_passes_required[0] & ~passes;
if (passes) {
prt_str(&buf, " running recovery passes: ");
prt_bitflags(&buf, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
write_sb = true;
}
if (check_version_upgrade(c))
write_sb = true;
if (write_sb)
bch2_write_super(c);
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
mutex_unlock(&c->sb_lock);
}
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
ret = bch2_blacklist_table_initialize(c);
if (ret) {
bch_err(c, "error initializing blacklist table");
@ -827,11 +915,6 @@ int bch2_fs_recovery(struct bch_fs *c)
if (ret)
goto err;
if (c->opts.fsck &&
(IS_ENABLED(CONFIG_BCACHEFS_DEBUG) ||
BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)))
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
ret = bch2_run_recovery_passes(c);
if (ret)
goto err;
@ -868,14 +951,28 @@ int bch2_fs_recovery(struct bch_fs *c)
}
mutex_lock(&c->sb_lock);
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != c->sb.version) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, c->sb.version);
bool write_sb = false;
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
write_sb = true;
}
if (!test_bit(BCH_FS_ERROR, &c->flags) &&
!(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) {
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
write_sb = true;
}
if (!test_bit(BCH_FS_ERROR, &c->flags)) {
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
write_sb = true;
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (ext &&
(!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
!bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
write_sb = true;
}
}
if (c->opts.fsck &&
@ -947,7 +1044,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
bch2_sb_maybe_downgrade(c);
bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
bch2_sb_upgrade(c, bcachefs_metadata_version_current);

View file

@ -4,6 +4,9 @@
extern const char * const bch2_recovery_passes[];
u64 bch2_recovery_passes_to_stable(u64 v);
u64 bch2_recovery_passes_from_stable(u64 v);
/*
* For when we need to rewind recovery passes and run a pass we skipped:
*/

View file

@ -7,45 +7,57 @@
#define PASS_UNCLEAN BIT(2)
#define PASS_ALWAYS BIT(3)
#define BCH_RECOVERY_PASSES() \
x(alloc_read, PASS_ALWAYS) \
x(stripes_read, PASS_ALWAYS) \
x(initialize_subvolumes, 0) \
x(snapshots_read, PASS_ALWAYS) \
x(check_topology, 0) \
x(check_allocations, PASS_FSCK) \
x(trans_mark_dev_sbs, PASS_ALWAYS|PASS_SILENT) \
x(fs_journal_alloc, PASS_ALWAYS|PASS_SILENT) \
x(set_may_go_rw, PASS_ALWAYS|PASS_SILENT) \
x(journal_replay, PASS_ALWAYS) \
x(check_alloc_info, PASS_FSCK) \
x(check_lrus, PASS_FSCK) \
x(check_btree_backpointers, PASS_FSCK) \
x(check_backpointers_to_extents,PASS_FSCK) \
x(check_extents_to_backpointers,PASS_FSCK) \
x(check_alloc_to_lru_refs, PASS_FSCK) \
x(fs_freespace_init, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 0) \
x(check_snapshot_trees, PASS_FSCK) \
x(check_snapshots, PASS_FSCK) \
x(check_subvols, PASS_FSCK) \
x(delete_dead_snapshots, PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 0) \
x(resume_logged_ops, PASS_ALWAYS) \
x(check_inodes, PASS_FSCK) \
x(check_extents, PASS_FSCK) \
x(check_indirect_extents, PASS_FSCK) \
x(check_dirents, PASS_FSCK) \
x(check_xattrs, PASS_FSCK) \
x(check_root, PASS_FSCK) \
x(check_directory_structure, PASS_FSCK) \
x(check_nlinks, PASS_FSCK) \
x(delete_dead_inodes, PASS_FSCK|PASS_UNCLEAN) \
x(fix_reflink_p, 0) \
x(set_fs_needs_rebalance, 0) \
/*
* Passes may be reordered, but the second field is a persistent identifier and
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
x(alloc_read, 0, PASS_ALWAYS) \
x(stripes_read, 1, PASS_ALWAYS) \
x(initialize_subvolumes, 2, 0) \
x(snapshots_read, 3, PASS_ALWAYS) \
x(check_topology, 4, 0) \
x(check_allocations, 5, PASS_FSCK) \
x(trans_mark_dev_sbs, 6, PASS_ALWAYS|PASS_SILENT) \
x(fs_journal_alloc, 7, PASS_ALWAYS|PASS_SILENT) \
x(set_may_go_rw, 8, PASS_ALWAYS|PASS_SILENT) \
x(journal_replay, 9, PASS_ALWAYS) \
x(check_alloc_info, 10, PASS_FSCK) \
x(check_lrus, 11, PASS_FSCK) \
x(check_btree_backpointers, 12, PASS_FSCK) \
x(check_backpointers_to_extents, 13, PASS_FSCK) \
x(check_extents_to_backpointers, 14, PASS_FSCK) \
x(check_alloc_to_lru_refs, 15, PASS_FSCK) \
x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \
x(bucket_gens_init, 17, 0) \
x(check_snapshot_trees, 18, PASS_FSCK) \
x(check_snapshots, 19, PASS_FSCK) \
x(check_subvols, 20, PASS_FSCK) \
x(delete_dead_snapshots, 21, PASS_FSCK) \
x(fs_upgrade_for_subvolumes, 22, 0) \
x(resume_logged_ops, 23, PASS_ALWAYS) \
x(check_inodes, 24, PASS_FSCK) \
x(check_extents, 25, PASS_FSCK) \
x(check_indirect_extents, 26, PASS_FSCK) \
x(check_dirents, 27, PASS_FSCK) \
x(check_xattrs, 28, PASS_FSCK) \
x(check_root, 29, PASS_FSCK) \
x(check_directory_structure, 30, PASS_FSCK) \
x(check_nlinks, 31, PASS_FSCK) \
x(delete_dead_inodes, 32, PASS_FSCK|PASS_UNCLEAN) \
x(fix_reflink_p, 33, 0) \
x(set_fs_needs_rebalance, 34, 0) \
/* We normally enumerate recovery passes in the order we run them: */
enum bch_recovery_pass {
#define x(n, when) BCH_RECOVERY_PASS_##n,
#define x(n, id, when) BCH_RECOVERY_PASS_##n,
BCH_RECOVERY_PASSES()
#undef x
};
/* But we also need stable identifiers that can be used in the superblock */
enum bch_recovery_pass_stable {
#define x(n, id, when) BCH_RECOVERY_PASS_STABLE_##n = id,
BCH_RECOVERY_PASSES()
#undef x
};

View file

@ -332,8 +332,6 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
bch2_sb_maybe_downgrade(c);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS);
ret = bch2_write_super(c);

188
fs/bcachefs/sb-downgrade.c Normal file
View file

@ -0,0 +1,188 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Superblock section that contains a list of recovery passes to run when
* downgrading past a given version
*/
#include "bcachefs.h"
#include "darray.h"
#include "recovery.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "super-io.h"
/*
* Downgrade table:
* When dowgrading past certain versions, we need to run certain recovery passes
* and fix certain errors:
*
* x(version, recovery_passes, errors...)
*/
#define DOWNGRADE_TABLE()
struct downgrade_entry {
u64 recovery_passes;
u16 version;
u16 nr_errors;
const u16 *errors;
};
#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
DOWNGRADE_TABLE()
#undef x
static const struct downgrade_entry downgrade_table[] = {
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver,\
.nr_errors = ARRAY_SIZE(ver_##errors), \
.errors = ver_##errors, \
},
DOWNGRADE_TABLE()
#undef x
};
static inline const struct bch_sb_field_downgrade_entry *
downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
{
return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
}
#define for_each_downgrade_entry(_d, _i) \
for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
(void *) _i < vstruct_end(&(_d)->field) && \
(void *) &_i->errors[0] < vstruct_end(&(_d)->field); \
_i = downgrade_entry_next_c(_i))
static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{
struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
for_each_downgrade_entry(e, i) {
if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
return -BCH_ERR_invalid_sb_downgrade;
}
}
return 0;
}
static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
if (out->nr_tabstops <= 1)
printbuf_tabstop_push(out, 16);
for_each_downgrade_entry(e, i) {
prt_str(out, "version:");
prt_tab(out);
bch2_version_to_text(out, le16_to_cpu(i->version));
prt_newline(out);
prt_str(out, "recovery passes:");
prt_tab(out);
prt_bitflags(out, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
prt_newline(out);
prt_str(out, "errors:");
prt_tab(out);
bool first = true;
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
if (!first)
prt_char(out, ',');
first = false;
unsigned e = le16_to_cpu(i->errors[j]);
prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
}
prt_newline(out);
}
}
const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
.validate = bch2_sb_downgrade_validate,
.to_text = bch2_sb_downgrade_to_text,
};
int bch2_sb_downgrade_update(struct bch_fs *c)
{
darray_char table = {};
int ret = 0;
for (const struct downgrade_entry *src = downgrade_table;
src < downgrade_table + ARRAY_SIZE(downgrade_table);
src++) {
if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
continue;
struct bch_sb_field_downgrade_entry *dst;
unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
ret = darray_make_room(&table, bytes);
if (ret)
goto out;
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
dst->recovery_passes[1] = 0;
dst->nr_errors = cpu_to_le16(src->nr_errors);
for (unsigned i = 0; i < src->nr_errors; i++)
dst->errors[i] = cpu_to_le16(src->errors[i]);
table.nr += bytes;
}
struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
goto out;
d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
if (!d) {
ret = -BCH_ERR_ENOSPC_sb_downgrade;
goto out;
}
memcpy(d->entries, table.data, table.nr);
memset_u64s_tail(d->entries, 0, table.nr);
out:
darray_exit(&table);
return ret;
}
void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
{
struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
if (!d)
return;
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
for_each_downgrade_entry(d, i) {
unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
if (new_minor < minor && minor <= old_minor) {
ext->recovery_passes_required[0] |= i->recovery_passes[0];
ext->recovery_passes_required[1] |= i->recovery_passes[1];
for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
unsigned e = le16_to_cpu(i->errors[j]);
if (e < BCH_SB_ERR_MAX)
__set_bit(e, c->sb.errors_silent);
if (e < sizeof(ext->errors_silent) * 8)
ext->errors_silent[e / 64] |= cpu_to_le64(BIT_ULL(e % 64));
}
}
}
}

View file

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_DOWNGRADE_H
#define _BCACHEFS_SB_DOWNGRADE_H
extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
int bch2_sb_downgrade_update(struct bch_fs *);
void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_SB_DOWNGRADE_H */

View file

@ -4,7 +4,7 @@
#include "sb-errors.h"
#include "super-io.h"
static const char * const bch2_sb_error_strs[] = {
const char * const bch2_sb_error_strs[] = {
#define x(t, n, ...) [n] = #t,
BCH_SB_ERRS()
NULL
@ -20,9 +20,7 @@ static void bch2_sb_error_id_to_text(struct printbuf *out, enum bch_sb_error_id
static inline unsigned bch2_sb_field_errors_nr_entries(struct bch_sb_field_errors *e)
{
return e
? (bch2_sb_field_bytes(&e->field) - sizeof(*e)) / sizeof(e->entries[0])
: 0;
return bch2_sb_field_nr_entries(e);
}
static inline unsigned bch2_sb_field_errors_u64s(unsigned nr)

View file

@ -4,258 +4,7 @@
#include "sb-errors_types.h"
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0) \
x(dirty_but_no_journal_entries, 1) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
x(sb_clean_journal_seq_mismatch, 3) \
x(sb_clean_btree_root_mismatch, 4) \
x(sb_clean_missing, 5) \
x(jset_unsupported_version, 6) \
x(jset_unknown_csum, 7) \
x(jset_last_seq_newer_than_seq, 8) \
x(jset_past_bucket_end, 9) \
x(jset_seq_blacklisted, 10) \
x(journal_entries_missing, 11) \
x(journal_entry_replicas_not_marked, 12) \
x(journal_entry_past_jset_end, 13) \
x(journal_entry_replicas_data_mismatch, 14) \
x(journal_entry_bkey_u64s_0, 15) \
x(journal_entry_bkey_past_end, 16) \
x(journal_entry_bkey_bad_format, 17) \
x(journal_entry_bkey_invalid, 18) \
x(journal_entry_btree_root_bad_size, 19) \
x(journal_entry_blacklist_bad_size, 20) \
x(journal_entry_blacklist_v2_bad_size, 21) \
x(journal_entry_blacklist_v2_start_past_end, 22) \
x(journal_entry_usage_bad_size, 23) \
x(journal_entry_data_usage_bad_size, 24) \
x(journal_entry_clock_bad_size, 25) \
x(journal_entry_clock_bad_rw, 26) \
x(journal_entry_dev_usage_bad_size, 27) \
x(journal_entry_dev_usage_bad_dev, 28) \
x(journal_entry_dev_usage_bad_pad, 29) \
x(btree_node_unreadable, 30) \
x(btree_node_fault_injected, 31) \
x(btree_node_bad_magic, 32) \
x(btree_node_bad_seq, 33) \
x(btree_node_unsupported_version, 34) \
x(btree_node_bset_older_than_sb_min, 35) \
x(btree_node_bset_newer_than_sb, 36) \
x(btree_node_data_missing, 37) \
x(btree_node_bset_after_end, 38) \
x(btree_node_replicas_sectors_written_mismatch, 39) \
x(btree_node_replicas_data_mismatch, 40) \
x(bset_unknown_csum, 41) \
x(bset_bad_csum, 42) \
x(bset_past_end_of_btree_node, 43) \
x(bset_wrong_sector_offset, 44) \
x(bset_empty, 45) \
x(bset_bad_seq, 46) \
x(bset_blacklisted_journal_seq, 47) \
x(first_bset_blacklisted_journal_seq, 48) \
x(btree_node_bad_btree, 49) \
x(btree_node_bad_level, 50) \
x(btree_node_bad_min_key, 51) \
x(btree_node_bad_max_key, 52) \
x(btree_node_bad_format, 53) \
x(btree_node_bkey_past_bset_end, 54) \
x(btree_node_bkey_bad_format, 55) \
x(btree_node_bad_bkey, 56) \
x(btree_node_bkey_out_of_order, 57) \
x(btree_root_bkey_invalid, 58) \
x(btree_root_read_error, 59) \
x(btree_root_bad_min_key, 60) \
x(btree_root_bad_max_key, 61) \
x(btree_node_read_error, 62) \
x(btree_node_topology_bad_min_key, 63) \
x(btree_node_topology_bad_max_key, 64) \
x(btree_node_topology_overwritten_by_prev_node, 65) \
x(btree_node_topology_overwritten_by_next_node, 66) \
x(btree_node_topology_interior_node_empty, 67) \
x(fs_usage_hidden_wrong, 68) \
x(fs_usage_btree_wrong, 69) \
x(fs_usage_data_wrong, 70) \
x(fs_usage_cached_wrong, 71) \
x(fs_usage_reserved_wrong, 72) \
x(fs_usage_persistent_reserved_wrong, 73) \
x(fs_usage_nr_inodes_wrong, 74) \
x(fs_usage_replicas_wrong, 75) \
x(dev_usage_buckets_wrong, 76) \
x(dev_usage_sectors_wrong, 77) \
x(dev_usage_fragmented_wrong, 78) \
x(dev_usage_buckets_ec_wrong, 79) \
x(bkey_version_in_future, 80) \
x(bkey_u64s_too_small, 81) \
x(bkey_invalid_type_for_btree, 82) \
x(bkey_extent_size_zero, 83) \
x(bkey_extent_size_greater_than_offset, 84) \
x(bkey_size_nonzero, 85) \
x(bkey_snapshot_nonzero, 86) \
x(bkey_snapshot_zero, 87) \
x(bkey_at_pos_max, 88) \
x(bkey_before_start_of_btree_node, 89) \
x(bkey_after_end_of_btree_node, 90) \
x(bkey_val_size_nonzero, 91) \
x(bkey_val_size_too_small, 92) \
x(alloc_v1_val_size_bad, 93) \
x(alloc_v2_unpack_error, 94) \
x(alloc_v3_unpack_error, 95) \
x(alloc_v4_val_size_bad, 96) \
x(alloc_v4_backpointers_start_bad, 97) \
x(alloc_key_data_type_bad, 98) \
x(alloc_key_empty_but_have_data, 99) \
x(alloc_key_dirty_sectors_0, 100) \
x(alloc_key_data_type_inconsistency, 101) \
x(alloc_key_to_missing_dev_bucket, 102) \
x(alloc_key_cached_inconsistency, 103) \
x(alloc_key_cached_but_read_time_zero, 104) \
x(alloc_key_to_missing_lru_entry, 105) \
x(alloc_key_data_type_wrong, 106) \
x(alloc_key_gen_wrong, 107) \
x(alloc_key_dirty_sectors_wrong, 108) \
x(alloc_key_cached_sectors_wrong, 109) \
x(alloc_key_stripe_wrong, 110) \
x(alloc_key_stripe_redundancy_wrong, 111) \
x(bucket_sector_count_overflow, 112) \
x(bucket_metadata_type_mismatch, 113) \
x(need_discard_key_wrong, 114) \
x(freespace_key_wrong, 115) \
x(freespace_hole_missing, 116) \
x(bucket_gens_val_size_bad, 117) \
x(bucket_gens_key_wrong, 118) \
x(bucket_gens_hole_wrong, 119) \
x(bucket_gens_to_invalid_dev, 120) \
x(bucket_gens_to_invalid_buckets, 121) \
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
x(backpointer_pos_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(lru_entry_at_time_0, 129) \
x(lru_entry_to_invalid_bucket, 130) \
x(lru_entry_bad, 131) \
x(btree_ptr_val_too_big, 132) \
x(btree_ptr_v2_val_too_big, 133) \
x(btree_ptr_has_non_ptr, 134) \
x(extent_ptrs_invalid_entry, 135) \
x(extent_ptrs_no_ptrs, 136) \
x(extent_ptrs_too_many_ptrs, 137) \
x(extent_ptrs_redundant_crc, 138) \
x(extent_ptrs_redundant_stripe, 139) \
x(extent_ptrs_unwritten, 140) \
x(extent_ptrs_written_and_unwritten, 141) \
x(ptr_to_invalid_device, 142) \
x(ptr_to_duplicate_device, 143) \
x(ptr_after_last_bucket, 144) \
x(ptr_before_first_bucket, 145) \
x(ptr_spans_multiple_buckets, 146) \
x(ptr_to_missing_backpointer, 147) \
x(ptr_to_missing_alloc_key, 148) \
x(ptr_to_missing_replicas_entry, 149) \
x(ptr_to_missing_stripe, 150) \
x(ptr_to_incorrect_stripe, 151) \
x(ptr_gen_newer_than_bucket_gen, 152) \
x(ptr_too_stale, 153) \
x(stale_dirty_ptr, 154) \
x(ptr_bucket_data_type_mismatch, 155) \
x(ptr_cached_and_erasure_coded, 156) \
x(ptr_crc_uncompressed_size_too_small, 157) \
x(ptr_crc_csum_type_unknown, 158) \
x(ptr_crc_compression_type_unknown, 159) \
x(ptr_crc_redundant, 160) \
x(ptr_crc_uncompressed_size_too_big, 161) \
x(ptr_crc_nonce_mismatch, 162) \
x(ptr_stripe_redundant, 163) \
x(reservation_key_nr_replicas_invalid, 164) \
x(reflink_v_refcount_wrong, 165) \
x(reflink_p_to_missing_reflink_v, 166) \
x(stripe_pos_bad, 167) \
x(stripe_val_size_bad, 168) \
x(stripe_sector_count_wrong, 169) \
x(snapshot_tree_pos_bad, 170) \
x(snapshot_tree_to_missing_snapshot, 171) \
x(snapshot_tree_to_missing_subvol, 172) \
x(snapshot_tree_to_wrong_subvol, 173) \
x(snapshot_tree_to_snapshot_subvol, 174) \
x(snapshot_pos_bad, 175) \
x(snapshot_parent_bad, 176) \
x(snapshot_children_not_normalized, 177) \
x(snapshot_child_duplicate, 178) \
x(snapshot_child_bad, 179) \
x(snapshot_skiplist_not_normalized, 180) \
x(snapshot_skiplist_bad, 181) \
x(snapshot_should_not_have_subvol, 182) \
x(snapshot_to_bad_snapshot_tree, 183) \
x(snapshot_bad_depth, 184) \
x(snapshot_bad_skiplist, 185) \
x(subvol_pos_bad, 186) \
x(subvol_not_master_and_not_snapshot, 187) \
x(subvol_to_missing_root, 188) \
x(subvol_root_wrong_bi_subvol, 189) \
x(bkey_in_missing_snapshot, 190) \
x(inode_pos_inode_nonzero, 191) \
x(inode_pos_blockdev_range, 192) \
x(inode_unpack_error, 193) \
x(inode_str_hash_invalid, 194) \
x(inode_v3_fields_start_bad, 195) \
x(inode_snapshot_mismatch, 196) \
x(inode_unlinked_but_clean, 197) \
x(inode_unlinked_but_nlink_nonzero, 198) \
x(inode_checksum_type_invalid, 199) \
x(inode_compression_type_invalid, 200) \
x(inode_subvol_root_but_not_dir, 201) \
x(inode_i_size_dirty_but_clean, 202) \
x(inode_i_sectors_dirty_but_clean, 203) \
x(inode_i_sectors_wrong, 204) \
x(inode_dir_wrong_nlink, 205) \
x(inode_dir_multiple_links, 206) \
x(inode_multiple_links_but_nlink_0, 207) \
x(inode_wrong_backpointer, 208) \
x(inode_wrong_nlink, 209) \
x(inode_unreachable, 210) \
x(deleted_inode_but_clean, 211) \
x(deleted_inode_missing, 212) \
x(deleted_inode_is_dir, 213) \
x(deleted_inode_not_unlinked, 214) \
x(extent_overlapping, 215) \
x(extent_in_missing_inode, 216) \
x(extent_in_non_reg_inode, 217) \
x(extent_past_end_of_inode, 218) \
x(dirent_empty_name, 219) \
x(dirent_val_too_big, 220) \
x(dirent_name_too_long, 221) \
x(dirent_name_embedded_nul, 222) \
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
x(dirent_d_parent_subvol_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
x(dirent_to_missing_subvol, 230) \
x(dirent_to_itself, 231) \
x(quota_type_invalid, 232) \
x(xattr_val_size_too_small, 233) \
x(xattr_val_size_too_big, 234) \
x(xattr_invalid_type, 235) \
x(xattr_name_invalid_chars, 236) \
x(xattr_in_missing_inode, 237) \
x(root_subvol_missing, 238) \
x(root_dir_missing, 239) \
x(root_inode_not_dir, 240) \
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
};
extern const char * const bch2_sb_error_strs[];
extern const struct bch_sb_field_ops bch_sb_field_ops_errors;

View file

@ -4,6 +4,259 @@
#include "darray.h"
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0) \
x(dirty_but_no_journal_entries, 1) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
x(sb_clean_journal_seq_mismatch, 3) \
x(sb_clean_btree_root_mismatch, 4) \
x(sb_clean_missing, 5) \
x(jset_unsupported_version, 6) \
x(jset_unknown_csum, 7) \
x(jset_last_seq_newer_than_seq, 8) \
x(jset_past_bucket_end, 9) \
x(jset_seq_blacklisted, 10) \
x(journal_entries_missing, 11) \
x(journal_entry_replicas_not_marked, 12) \
x(journal_entry_past_jset_end, 13) \
x(journal_entry_replicas_data_mismatch, 14) \
x(journal_entry_bkey_u64s_0, 15) \
x(journal_entry_bkey_past_end, 16) \
x(journal_entry_bkey_bad_format, 17) \
x(journal_entry_bkey_invalid, 18) \
x(journal_entry_btree_root_bad_size, 19) \
x(journal_entry_blacklist_bad_size, 20) \
x(journal_entry_blacklist_v2_bad_size, 21) \
x(journal_entry_blacklist_v2_start_past_end, 22) \
x(journal_entry_usage_bad_size, 23) \
x(journal_entry_data_usage_bad_size, 24) \
x(journal_entry_clock_bad_size, 25) \
x(journal_entry_clock_bad_rw, 26) \
x(journal_entry_dev_usage_bad_size, 27) \
x(journal_entry_dev_usage_bad_dev, 28) \
x(journal_entry_dev_usage_bad_pad, 29) \
x(btree_node_unreadable, 30) \
x(btree_node_fault_injected, 31) \
x(btree_node_bad_magic, 32) \
x(btree_node_bad_seq, 33) \
x(btree_node_unsupported_version, 34) \
x(btree_node_bset_older_than_sb_min, 35) \
x(btree_node_bset_newer_than_sb, 36) \
x(btree_node_data_missing, 37) \
x(btree_node_bset_after_end, 38) \
x(btree_node_replicas_sectors_written_mismatch, 39) \
x(btree_node_replicas_data_mismatch, 40) \
x(bset_unknown_csum, 41) \
x(bset_bad_csum, 42) \
x(bset_past_end_of_btree_node, 43) \
x(bset_wrong_sector_offset, 44) \
x(bset_empty, 45) \
x(bset_bad_seq, 46) \
x(bset_blacklisted_journal_seq, 47) \
x(first_bset_blacklisted_journal_seq, 48) \
x(btree_node_bad_btree, 49) \
x(btree_node_bad_level, 50) \
x(btree_node_bad_min_key, 51) \
x(btree_node_bad_max_key, 52) \
x(btree_node_bad_format, 53) \
x(btree_node_bkey_past_bset_end, 54) \
x(btree_node_bkey_bad_format, 55) \
x(btree_node_bad_bkey, 56) \
x(btree_node_bkey_out_of_order, 57) \
x(btree_root_bkey_invalid, 58) \
x(btree_root_read_error, 59) \
x(btree_root_bad_min_key, 60) \
x(btree_root_bad_max_key, 61) \
x(btree_node_read_error, 62) \
x(btree_node_topology_bad_min_key, 63) \
x(btree_node_topology_bad_max_key, 64) \
x(btree_node_topology_overwritten_by_prev_node, 65) \
x(btree_node_topology_overwritten_by_next_node, 66) \
x(btree_node_topology_interior_node_empty, 67) \
x(fs_usage_hidden_wrong, 68) \
x(fs_usage_btree_wrong, 69) \
x(fs_usage_data_wrong, 70) \
x(fs_usage_cached_wrong, 71) \
x(fs_usage_reserved_wrong, 72) \
x(fs_usage_persistent_reserved_wrong, 73) \
x(fs_usage_nr_inodes_wrong, 74) \
x(fs_usage_replicas_wrong, 75) \
x(dev_usage_buckets_wrong, 76) \
x(dev_usage_sectors_wrong, 77) \
x(dev_usage_fragmented_wrong, 78) \
x(dev_usage_buckets_ec_wrong, 79) \
x(bkey_version_in_future, 80) \
x(bkey_u64s_too_small, 81) \
x(bkey_invalid_type_for_btree, 82) \
x(bkey_extent_size_zero, 83) \
x(bkey_extent_size_greater_than_offset, 84) \
x(bkey_size_nonzero, 85) \
x(bkey_snapshot_nonzero, 86) \
x(bkey_snapshot_zero, 87) \
x(bkey_at_pos_max, 88) \
x(bkey_before_start_of_btree_node, 89) \
x(bkey_after_end_of_btree_node, 90) \
x(bkey_val_size_nonzero, 91) \
x(bkey_val_size_too_small, 92) \
x(alloc_v1_val_size_bad, 93) \
x(alloc_v2_unpack_error, 94) \
x(alloc_v3_unpack_error, 95) \
x(alloc_v4_val_size_bad, 96) \
x(alloc_v4_backpointers_start_bad, 97) \
x(alloc_key_data_type_bad, 98) \
x(alloc_key_empty_but_have_data, 99) \
x(alloc_key_dirty_sectors_0, 100) \
x(alloc_key_data_type_inconsistency, 101) \
x(alloc_key_to_missing_dev_bucket, 102) \
x(alloc_key_cached_inconsistency, 103) \
x(alloc_key_cached_but_read_time_zero, 104) \
x(alloc_key_to_missing_lru_entry, 105) \
x(alloc_key_data_type_wrong, 106) \
x(alloc_key_gen_wrong, 107) \
x(alloc_key_dirty_sectors_wrong, 108) \
x(alloc_key_cached_sectors_wrong, 109) \
x(alloc_key_stripe_wrong, 110) \
x(alloc_key_stripe_redundancy_wrong, 111) \
x(bucket_sector_count_overflow, 112) \
x(bucket_metadata_type_mismatch, 113) \
x(need_discard_key_wrong, 114) \
x(freespace_key_wrong, 115) \
x(freespace_hole_missing, 116) \
x(bucket_gens_val_size_bad, 117) \
x(bucket_gens_key_wrong, 118) \
x(bucket_gens_hole_wrong, 119) \
x(bucket_gens_to_invalid_dev, 120) \
x(bucket_gens_to_invalid_buckets, 121) \
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
x(backpointer_pos_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(lru_entry_at_time_0, 129) \
x(lru_entry_to_invalid_bucket, 130) \
x(lru_entry_bad, 131) \
x(btree_ptr_val_too_big, 132) \
x(btree_ptr_v2_val_too_big, 133) \
x(btree_ptr_has_non_ptr, 134) \
x(extent_ptrs_invalid_entry, 135) \
x(extent_ptrs_no_ptrs, 136) \
x(extent_ptrs_too_many_ptrs, 137) \
x(extent_ptrs_redundant_crc, 138) \
x(extent_ptrs_redundant_stripe, 139) \
x(extent_ptrs_unwritten, 140) \
x(extent_ptrs_written_and_unwritten, 141) \
x(ptr_to_invalid_device, 142) \
x(ptr_to_duplicate_device, 143) \
x(ptr_after_last_bucket, 144) \
x(ptr_before_first_bucket, 145) \
x(ptr_spans_multiple_buckets, 146) \
x(ptr_to_missing_backpointer, 147) \
x(ptr_to_missing_alloc_key, 148) \
x(ptr_to_missing_replicas_entry, 149) \
x(ptr_to_missing_stripe, 150) \
x(ptr_to_incorrect_stripe, 151) \
x(ptr_gen_newer_than_bucket_gen, 152) \
x(ptr_too_stale, 153) \
x(stale_dirty_ptr, 154) \
x(ptr_bucket_data_type_mismatch, 155) \
x(ptr_cached_and_erasure_coded, 156) \
x(ptr_crc_uncompressed_size_too_small, 157) \
x(ptr_crc_csum_type_unknown, 158) \
x(ptr_crc_compression_type_unknown, 159) \
x(ptr_crc_redundant, 160) \
x(ptr_crc_uncompressed_size_too_big, 161) \
x(ptr_crc_nonce_mismatch, 162) \
x(ptr_stripe_redundant, 163) \
x(reservation_key_nr_replicas_invalid, 164) \
x(reflink_v_refcount_wrong, 165) \
x(reflink_p_to_missing_reflink_v, 166) \
x(stripe_pos_bad, 167) \
x(stripe_val_size_bad, 168) \
x(stripe_sector_count_wrong, 169) \
x(snapshot_tree_pos_bad, 170) \
x(snapshot_tree_to_missing_snapshot, 171) \
x(snapshot_tree_to_missing_subvol, 172) \
x(snapshot_tree_to_wrong_subvol, 173) \
x(snapshot_tree_to_snapshot_subvol, 174) \
x(snapshot_pos_bad, 175) \
x(snapshot_parent_bad, 176) \
x(snapshot_children_not_normalized, 177) \
x(snapshot_child_duplicate, 178) \
x(snapshot_child_bad, 179) \
x(snapshot_skiplist_not_normalized, 180) \
x(snapshot_skiplist_bad, 181) \
x(snapshot_should_not_have_subvol, 182) \
x(snapshot_to_bad_snapshot_tree, 183) \
x(snapshot_bad_depth, 184) \
x(snapshot_bad_skiplist, 185) \
x(subvol_pos_bad, 186) \
x(subvol_not_master_and_not_snapshot, 187) \
x(subvol_to_missing_root, 188) \
x(subvol_root_wrong_bi_subvol, 189) \
x(bkey_in_missing_snapshot, 190) \
x(inode_pos_inode_nonzero, 191) \
x(inode_pos_blockdev_range, 192) \
x(inode_unpack_error, 193) \
x(inode_str_hash_invalid, 194) \
x(inode_v3_fields_start_bad, 195) \
x(inode_snapshot_mismatch, 196) \
x(inode_unlinked_but_clean, 197) \
x(inode_unlinked_but_nlink_nonzero, 198) \
x(inode_checksum_type_invalid, 199) \
x(inode_compression_type_invalid, 200) \
x(inode_subvol_root_but_not_dir, 201) \
x(inode_i_size_dirty_but_clean, 202) \
x(inode_i_sectors_dirty_but_clean, 203) \
x(inode_i_sectors_wrong, 204) \
x(inode_dir_wrong_nlink, 205) \
x(inode_dir_multiple_links, 206) \
x(inode_multiple_links_but_nlink_0, 207) \
x(inode_wrong_backpointer, 208) \
x(inode_wrong_nlink, 209) \
x(inode_unreachable, 210) \
x(deleted_inode_but_clean, 211) \
x(deleted_inode_missing, 212) \
x(deleted_inode_is_dir, 213) \
x(deleted_inode_not_unlinked, 214) \
x(extent_overlapping, 215) \
x(extent_in_missing_inode, 216) \
x(extent_in_non_reg_inode, 217) \
x(extent_past_end_of_inode, 218) \
x(dirent_empty_name, 219) \
x(dirent_val_too_big, 220) \
x(dirent_name_too_long, 221) \
x(dirent_name_embedded_nul, 222) \
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
x(dirent_d_parent_subvol_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
x(dirent_to_missing_subvol, 230) \
x(dirent_to_itself, 231) \
x(quota_type_invalid, 232) \
x(xattr_val_size_too_small, 233) \
x(xattr_val_size_too_big, 234) \
x(xattr_invalid_type, 235) \
x(xattr_name_invalid_chars, 236) \
x(xattr_in_missing_inode, 237) \
x(root_subvol_missing, 238) \
x(root_dir_missing, 239) \
x(root_inode_not_dir, 240) \
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
};
struct bch_sb_error_entry_cpu {
u64 id:16,
nr:48;

View file

@ -146,6 +146,24 @@ int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
}
int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol)
{
struct bch_subvolume s;
int ret = bch2_subvolume_get_inlined(trans, subvol, true, 0, &s);
if (ret)
return ret;
if (BCH_SUBVOLUME_RO(&s))
return -EROFS;
return 0;
}
int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol)
{
return bch2_trans_do(c, NULL, NULL, 0,
bch2_subvol_is_ro_trans(trans, subvol));
}
int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
struct bch_subvolume *subvol)
{

View file

@ -23,6 +23,9 @@ int bch2_subvolume_get(struct btree_trans *, unsigned,
bool, int, struct bch_subvolume *);
int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *);
int bch2_subvol_is_ro_trans(struct btree_trans *, u32);
int bch2_subvol_is_ro(struct bch_fs *, u32);
int bch2_delete_dead_snapshots(struct bch_fs *);
void bch2_delete_dead_snapshots_async(struct bch_fs *);

View file

@ -13,6 +13,7 @@
#include "replicas.h"
#include "quota.h"
#include "sb-clean.h"
#include "sb-downgrade.h"
#include "sb-errors.h"
#include "sb-members.h"
#include "super-io.h"
@ -264,6 +265,17 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb,
return f;
}
struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb,
enum bch_sb_field_type type,
unsigned u64s)
{
struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type);
if (!f || le32_to_cpu(f->u64s) < u64s)
f = bch2_sb_field_resize_id(sb, type, u64s);
return f;
}
/* Superblock validate: */
static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out)
@ -484,6 +496,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, struct printbuf *out,
/* device open: */
static unsigned long le_ulong_to_cpu(unsigned long v)
{
return sizeof(unsigned long) == 8
? le64_to_cpu(v)
: le32_to_cpu(v);
}
static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr)
{
BUG_ON(nr & (BITS_PER_TYPE(long) - 1));
for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++)
dst[i] = le_ulong_to_cpu(src[i]);
}
static void bch2_sb_update(struct bch_fs *c)
{
struct bch_sb *src = c->disk_sb.sb;
@ -512,8 +539,15 @@ static void bch2_sb_update(struct bch_fs *c)
c->sb.features = le64_to_cpu(src->features[0]);
c->sb.compat = le64_to_cpu(src->compat[0]);
memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
if (ext)
le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
sizeof(c->sb.errors_silent) * 8);
for_each_member_device(ca, c, i) {
struct bch_member m = bch2_sb_member_get(src, i);
struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
ca->mi = bch2_mi_to_cpu(&m);
}
}
@ -906,6 +940,7 @@ int bch2_write_super(struct bch_fs *c)
bch2_sb_members_from_cpu(c);
bch2_sb_members_cpy_v2_v1(&c->disk_sb);
bch2_sb_errors_from_cpu(c);
bch2_sb_downgrade_update(c);
for_each_online_member(ca, c, i)
bch2_sb_from_fs(c, ca);
@ -1029,8 +1064,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
}
/* Downgrade if superblock is at a higher version than currently supported: */
void bch2_sb_maybe_downgrade(struct bch_fs *c)
bool bch2_check_version_downgrade(struct bch_fs *c)
{
bool ret = bcachefs_metadata_version_current < c->sb.version;
lockdep_assert_held(&c->sb_lock);
/*
@ -1044,16 +1081,61 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
if (c->sb.version_min > bcachefs_metadata_version_current)
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
return ret;
}
void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
{
lockdep_assert_held(&c->sb_lock);
if (BCH_VERSION_MAJOR(new_version) >
BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
bch2_sb_field_resize(&c->disk_sb, downgrade, 0);
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
}
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{
if (vstruct_bytes(f) < 88) {
prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88);
return -BCH_ERR_invalid_sb_ext;
}
return 0;
}
static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
struct bch_sb_field *f)
{
struct bch_sb_field_ext *e = field_to_type(f, ext);
prt_printf(out, "Recovery passes required:");
prt_tab(out);
prt_bitflags(out, bch2_recovery_passes,
bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0])));
prt_newline(out);
unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL);
if (errors_silent) {
le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8);
prt_printf(out, "Errors to silently fix:");
prt_tab(out);
prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, sizeof(e->errors_silent) * 8);
prt_newline(out);
kfree(errors_silent);
}
}
static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
.validate = bch2_sb_ext_validate,
.to_text = bch2_sb_ext_to_text,
};
static const struct bch_sb_field_ops *bch2_sb_field_ops[] = {
#define x(f, nr) \
[BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f,

View file

@ -40,6 +40,16 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *,
#define bch2_sb_field_resize(_sb, _name, _u64s) \
field_to_type(bch2_sb_field_resize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned);
#define bch2_sb_field_get_minsize(_sb, _name, _u64s) \
field_to_type(bch2_sb_field_get_minsize_id(_sb, BCH_SB_FIELD_##_name, _u64s), _name)
#define bch2_sb_field_nr_entries(_f) \
(_f ? ((bch2_sb_field_bytes(&_f->field) - sizeof(*_f)) / \
sizeof(_f->entries[0])) \
: 0)
void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
extern const char * const bch2_sb_fields[];
@ -83,7 +93,7 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
void bch2_sb_maybe_downgrade(struct bch_fs *);
bool bch2_check_version_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,

View file

@ -243,6 +243,7 @@ do { \
#define prt_units_s64(...) bch2_prt_units_s64(__VA_ARGS__)
#define prt_string_option(...) bch2_prt_string_option(__VA_ARGS__)
#define prt_bitflags(...) bch2_prt_bitflags(__VA_ARGS__)
#define prt_bitflags_vector(...) bch2_prt_bitflags_vector(__VA_ARGS__)
void bch2_pr_time_units(struct printbuf *, u64);
void bch2_prt_datetime(struct printbuf *, time64_t);

View file

@ -176,7 +176,8 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
struct btree_iter inode_iter = { NULL };
int ret;
ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
ret = bch2_subvol_is_ro_trans(trans, inum.subvol) ?:
bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_INTENT);
if (ret)
return ret;