mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-13 14:14:37 +00:00
bcachefs: btree_ptr_v2
Add a new btree ptr type which contains the sequence number (random 64 bit cookie, actually) for that btree node - this lets us verify that when we read in a btree node it really is the btree node we wanted. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
237e80483a
commit
548b3d209f
11 changed files with 117 additions and 26 deletions
|
@ -343,7 +343,8 @@ static inline void bkey_init(struct bkey *k)
|
|||
x(stripe, 14) \
|
||||
x(reflink_p, 15) \
|
||||
x(reflink_v, 16) \
|
||||
x(inline_data, 17)
|
||||
x(inline_data, 17) \
|
||||
x(btree_ptr_v2, 18)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
|
@ -599,6 +600,19 @@ struct bch_btree_ptr {
|
|||
struct bch_extent_ptr start[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
struct bch_btree_ptr_v2 {
|
||||
struct bch_val v;
|
||||
|
||||
__u64 mem_ptr;
|
||||
__le64 seq;
|
||||
__le16 sectors_written;
|
||||
/* In case we ever decide to do variable size btree nodes: */
|
||||
__le16 sectors;
|
||||
struct bpos min_key;
|
||||
__u64 _data[0];
|
||||
struct bch_extent_ptr start[];
|
||||
} __attribute__((packed, aligned(8)));
|
||||
|
||||
struct bch_extent {
|
||||
struct bch_val v;
|
||||
|
||||
|
@ -630,7 +644,8 @@ struct bch_reservation {
|
|||
|
||||
/* Btree pointers don't carry around checksums: */
|
||||
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
|
||||
((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX)
|
||||
((sizeof(struct bch_btree_ptr_v2) + \
|
||||
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(u64))
|
||||
#define BKEY_BTREE_PTR_U64s_MAX \
|
||||
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
|
||||
|
||||
|
@ -1299,7 +1314,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
|
|||
x(new_siphash, 7) \
|
||||
x(inline_data, 8) \
|
||||
x(new_extent_overwrite, 9) \
|
||||
x(incompressible, 10)
|
||||
x(incompressible, 10) \
|
||||
x(btree_ptr_v2, 11)
|
||||
|
||||
enum bch_sb_feature {
|
||||
#define x(f, n) BCH_FEATURE_##f,
|
||||
|
|
|
@ -573,6 +573,7 @@ BKEY_VAL_ACCESSORS(stripe);
|
|||
BKEY_VAL_ACCESSORS(reflink_p);
|
||||
BKEY_VAL_ACCESSORS(reflink_v);
|
||||
BKEY_VAL_ACCESSORS(inline_data);
|
||||
BKEY_VAL_ACCESSORS(btree_ptr_v2);
|
||||
|
||||
/* byte order helpers */
|
||||
|
||||
|
|
|
@ -40,6 +40,8 @@ static inline u64 btree_ptr_hash_val(const struct bkey_i *k)
|
|||
switch (k->k.type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
return *((u64 *) bkey_i_to_btree_ptr_c(k)->v.start);
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
return bkey_i_to_btree_ptr_v2_c(k)->v.seq;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -734,6 +734,15 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
|
|||
bch2_bpos_swab(&b->data->max_key);
|
||||
}
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bch_btree_ptr_v2 *bp =
|
||||
&bkey_i_to_btree_ptr_v2(&b->key)->v;
|
||||
|
||||
btree_err_on(bkey_cmp(b->data->min_key, bp->min_key),
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"incorrect min_key");
|
||||
}
|
||||
|
||||
btree_err_on(bkey_cmp(b->data->max_key, b->key.k.p),
|
||||
BTREE_ERR_MUST_RETRY, c, b, i,
|
||||
"incorrect max key");
|
||||
|
@ -897,6 +906,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"bad btree header");
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bch_btree_ptr_v2 *bp =
|
||||
&bkey_i_to_btree_ptr_v2(&b->key)->v;
|
||||
|
||||
btree_err_on(b->data->keys.seq != bp->seq,
|
||||
BTREE_ERR_MUST_RETRY, c, b, NULL,
|
||||
"got wrong btree node");
|
||||
}
|
||||
|
||||
while (b->written < c->opts.btree_node_size) {
|
||||
unsigned sectors, whiteout_u64s = 0;
|
||||
struct nonce nonce;
|
||||
|
@ -1004,15 +1022,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||
i = &b->data->keys;
|
||||
for (k = i->start; k != vstruct_last(i);) {
|
||||
struct bkey tmp;
|
||||
struct bkey_s_c u = bkey_disassemble(b, k, &tmp);
|
||||
const char *invalid = bch2_bkey_val_invalid(c, u);
|
||||
struct bkey_s u = __bkey_disassemble(b, k, &tmp);
|
||||
const char *invalid = bch2_bkey_val_invalid(c, u.s_c);
|
||||
|
||||
if (invalid ||
|
||||
(inject_invalid_keys(c) &&
|
||||
!bversion_cmp(u.k->version, MAX_VERSION))) {
|
||||
char buf[160];
|
||||
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, u);
|
||||
bch2_bkey_val_to_text(&PBUF(buf), c, u.s_c);
|
||||
btree_err(BTREE_ERR_FIXABLE, c, b, i,
|
||||
"invalid bkey %s: %s", buf, invalid);
|
||||
|
||||
|
@ -1025,6 +1043,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
|
|||
continue;
|
||||
}
|
||||
|
||||
if (u.k->type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u);
|
||||
|
||||
bp.v->mem_ptr = 0;
|
||||
}
|
||||
|
||||
k = bkey_next_skip_noops(k, vstruct_last(i));
|
||||
}
|
||||
|
||||
|
|
|
@ -332,7 +332,11 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
|
|||
goto retry;
|
||||
}
|
||||
|
||||
bkey_btree_ptr_init(&tmp.k);
|
||||
if (c->sb.features & (1ULL << BCH_FEATURE_btree_ptr_v2))
|
||||
bkey_btree_ptr_v2_init(&tmp.k);
|
||||
else
|
||||
bkey_btree_ptr_init(&tmp.k);
|
||||
|
||||
bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size);
|
||||
|
||||
bch2_open_bucket_get(c, wp, &ob);
|
||||
|
@ -354,14 +358,13 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
|||
{
|
||||
struct bch_fs *c = as->c;
|
||||
struct btree *b;
|
||||
int ret;
|
||||
|
||||
BUG_ON(level >= BTREE_MAX_DEPTH);
|
||||
BUG_ON(!as->reserve->nr);
|
||||
|
||||
b = as->reserve->b[--as->reserve->nr];
|
||||
|
||||
BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id));
|
||||
|
||||
set_btree_node_accessed(b);
|
||||
set_btree_node_dirty(b);
|
||||
set_btree_node_need_write(b);
|
||||
|
@ -372,7 +375,16 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
|||
b->data->flags = 0;
|
||||
SET_BTREE_NODE_ID(b->data, as->btree_id);
|
||||
SET_BTREE_NODE_LEVEL(b->data, level);
|
||||
b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
|
||||
b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
|
||||
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
|
||||
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
|
||||
|
||||
bp->v.mem_ptr = 0;
|
||||
bp->v.seq = b->data->keys.seq;
|
||||
bp->v.sectors_written = 0;
|
||||
bp->v.sectors = cpu_to_le16(c->opts.btree_node_size);
|
||||
}
|
||||
|
||||
if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
|
||||
SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
|
||||
|
@ -385,10 +397,26 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
|
|||
|
||||
btree_node_will_make_reachable(as, b);
|
||||
|
||||
ret = bch2_btree_node_hash_insert(&c->btree_cache, b, level, as->btree_id);
|
||||
BUG_ON(ret);
|
||||
|
||||
trace_btree_node_alloc(c, b);
|
||||
return b;
|
||||
}
|
||||
|
||||
static void btree_set_min(struct btree *b, struct bpos pos)
|
||||
{
|
||||
if (b->key.k.type == KEY_TYPE_btree_ptr_v2)
|
||||
bkey_i_to_btree_ptr_v2(&b->key)->v.min_key = pos;
|
||||
b->data->min_key = pos;
|
||||
}
|
||||
|
||||
static void btree_set_max(struct btree *b, struct bpos pos)
|
||||
{
|
||||
b->key.k.p = pos;
|
||||
b->data->max_key = pos;
|
||||
}
|
||||
|
||||
struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
||||
struct btree *b,
|
||||
struct bkey_format format)
|
||||
|
@ -397,11 +425,12 @@ struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *as,
|
|||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
|
||||
n->data->min_key = b->data->min_key;
|
||||
n->data->max_key = b->data->max_key;
|
||||
n->data->format = format;
|
||||
SET_BTREE_NODE_SEQ(n->data, BTREE_NODE_SEQ(b->data) + 1);
|
||||
|
||||
btree_set_min(n, b->data->min_key);
|
||||
btree_set_max(n, b->data->max_key);
|
||||
|
||||
n->data->format = format;
|
||||
btree_node_set_format(n, format);
|
||||
|
||||
bch2_btree_sort_into(as->c, n, b);
|
||||
|
@ -431,10 +460,9 @@ static struct btree *__btree_root_alloc(struct btree_update *as, unsigned level)
|
|||
{
|
||||
struct btree *b = bch2_btree_node_alloc(as, level);
|
||||
|
||||
b->data->min_key = POS_MIN;
|
||||
b->data->max_key = POS_MAX;
|
||||
btree_set_min(b, POS_MIN);
|
||||
btree_set_max(b, POS_MAX);
|
||||
b->data->format = bch2_btree_calc_format(b);
|
||||
b->key.k.p = POS_MAX;
|
||||
|
||||
btree_node_set_format(b, b->data->format);
|
||||
bch2_btree_build_aux_trees(b);
|
||||
|
@ -1263,10 +1291,8 @@ static struct btree *__btree_split_node(struct btree_update *as,
|
|||
|
||||
BUG_ON(!prev);
|
||||
|
||||
n1->key.k.p = bkey_unpack_pos(n1, prev);
|
||||
n1->data->max_key = n1->key.k.p;
|
||||
n2->data->min_key =
|
||||
btree_type_successor(n1->c.btree_id, n1->key.k.p);
|
||||
btree_set_max(n1, bkey_unpack_pos(n1, prev));
|
||||
btree_set_min(n2, btree_type_successor(n1->c.btree_id, n1->key.k.p));
|
||||
|
||||
set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
|
||||
set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
|
||||
|
@ -1749,10 +1775,9 @@ void __bch2_foreground_maybe_merge(struct bch_fs *c,
|
|||
|
||||
n = bch2_btree_node_alloc(as, b->c.level);
|
||||
|
||||
n->data->min_key = prev->data->min_key;
|
||||
n->data->max_key = next->data->max_key;
|
||||
btree_set_min(n, prev->data->min_key);
|
||||
btree_set_max(n, next->data->max_key);
|
||||
n->data->format = new_f;
|
||||
n->key.k.p = next->key.k.p;
|
||||
|
||||
btree_node_set_format(n, new_f);
|
||||
|
||||
|
@ -2202,8 +2227,8 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
|
|||
bch2_btree_build_aux_trees(b);
|
||||
|
||||
b->data->flags = 0;
|
||||
b->data->min_key = POS_MIN;
|
||||
b->data->max_key = POS_MAX;
|
||||
btree_set_min(b, POS_MIN);
|
||||
btree_set_max(b, POS_MAX);
|
||||
b->data->format = bch2_btree_calc_format(b);
|
||||
btree_node_set_format(b, b->data->format);
|
||||
|
||||
|
|
|
@ -1194,6 +1194,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
|
|||
ret = bch2_mark_alloc(c, k, fs_usage, journal_seq, flags);
|
||||
break;
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size;
|
||||
|
@ -1729,6 +1730,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
|
|||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
sectors = !(flags & BTREE_TRIGGER_OVERWRITE)
|
||||
? c->opts.btree_node_size
|
||||
: -c->opts.btree_node_size;
|
||||
|
|
|
@ -97,7 +97,8 @@ static inline struct bucket *PTR_BUCKET(struct bch_dev *ca,
|
|||
static inline enum bch_data_type ptr_data_type(const struct bkey *k,
|
||||
const struct bch_extent_ptr *ptr)
|
||||
{
|
||||
if (k->type == KEY_TYPE_btree_ptr)
|
||||
if (k->type == KEY_TYPE_btree_ptr ||
|
||||
k->type == KEY_TYPE_btree_ptr_v2)
|
||||
return BCH_DATA_BTREE;
|
||||
|
||||
return ptr->cached ? BCH_DATA_CACHED : BCH_DATA_USER;
|
||||
|
|
|
@ -748,6 +748,7 @@ void bch2_bkey_append_ptr(struct bkey_i *k,
|
|||
|
||||
switch (k->k.type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
case KEY_TYPE_extent:
|
||||
EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
|
||||
|
||||
|
@ -1030,6 +1031,8 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k)
|
|||
|
||||
if (k.k->type == KEY_TYPE_btree_ptr)
|
||||
size_ondisk = c->opts.btree_node_size;
|
||||
if (k.k->type == KEY_TYPE_btree_ptr_v2)
|
||||
size_ondisk = le16_to_cpu(bkey_s_c_to_btree_ptr_v2(k).v->sectors);
|
||||
|
||||
bkey_extent_entry_for_each(ptrs, entry) {
|
||||
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
|
||||
|
|
|
@ -225,6 +225,13 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
|
|||
bkey_val_end(r),
|
||||
};
|
||||
}
|
||||
case KEY_TYPE_btree_ptr_v2: {
|
||||
struct bkey_s_c_btree_ptr_v2 e = bkey_s_c_to_btree_ptr_v2(k);
|
||||
return (struct bkey_ptrs_c) {
|
||||
to_entry(&e.v->start[0]),
|
||||
to_entry(extent_entry_last(e))
|
||||
};
|
||||
}
|
||||
default:
|
||||
return (struct bkey_ptrs_c) { NULL, NULL };
|
||||
}
|
||||
|
@ -372,6 +379,13 @@ void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
|
|||
.swab = bch2_ptr_swab, \
|
||||
}
|
||||
|
||||
#define bch2_bkey_ops_btree_ptr_v2 (struct bkey_ops) { \
|
||||
.key_invalid = bch2_btree_ptr_invalid, \
|
||||
.key_debugcheck = bch2_btree_ptr_debugcheck, \
|
||||
.val_to_text = bch2_btree_ptr_to_text, \
|
||||
.swab = bch2_ptr_swab, \
|
||||
}
|
||||
|
||||
/* KEY_TYPE_extent: */
|
||||
|
||||
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
|
||||
|
@ -416,6 +430,7 @@ static inline bool bkey_extent_is_direct_data(const struct bkey *k)
|
|||
{
|
||||
switch (k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
case KEY_TYPE_extent:
|
||||
case KEY_TYPE_reflink_v:
|
||||
return true;
|
||||
|
|
|
@ -1010,6 +1010,7 @@ int bch2_fs_recovery(struct bch_fs *c)
|
|||
c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
|
||||
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_ptr_v2;
|
||||
write_sb = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -112,6 +112,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
|
|||
|
||||
switch (k.k->type) {
|
||||
case KEY_TYPE_btree_ptr:
|
||||
case KEY_TYPE_btree_ptr_v2:
|
||||
e->data_type = BCH_DATA_BTREE;
|
||||
extent_to_replicas(k, e);
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue