bcachefs: bkey noops

For upcoming inline data extents, we're going to need to be able to
shorten the value of existing bkeys in the btree - and to make that work
we're going to be able to need to pad out the space the value previously
took up with something.

This patch changes the various code that iterates over bkeys to handle
k->u64s == 0 as meaning "skip the next 8 bytes".

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2019-11-09 23:50:52 -05:00 committed by Kent Overstreet
parent aef90ce085
commit ad44bdc351
7 changed files with 83 additions and 69 deletions

View file

@ -41,6 +41,16 @@ struct bkey_s {
#define bkey_next(_k) vstruct_next(_k)
static inline struct bkey_packed *bkey_next_skip_noops(struct bkey_packed *k,
struct bkey_packed *end)
{
k = bkey_next(k);
while (k != end && !k->u64s)
k = (void *) ((u64 *) k + 1);
return k;
}
#define bkey_val_u64s(_k) ((_k)->u64s - BKEY_U64s)
static inline size_t bkey_val_bytes(const struct bkey *k)

View file

@ -75,6 +75,10 @@ static void sort_key_next(struct btree_node_iter_large *iter,
{
i->k += __btree_node_offset_to_key(b, i->k)->u64s;
while (i->k != i->end &&
!__btree_node_offset_to_key(b, i->k)->u64s)
i->k++;
if (i->k == i->end)
*i = iter->data[--iter->used];
}
@ -119,7 +123,7 @@ static inline struct bkey_packed *sort_iter_peek(struct sort_iter *iter)
static inline void sort_iter_advance(struct sort_iter *iter, sort_cmp_fn cmp)
{
iter->data->k = bkey_next(iter->data->k);
iter->data->k = bkey_next_skip_noops(iter->data->k, iter->data->end);
BUG_ON(iter->data->k > iter->data->end);

View file

@ -64,7 +64,7 @@ void bch2_dump_bset(struct btree *b, struct bset *i, unsigned set)
for (_k = i->start, k = bkey_unpack_key(b, _k);
_k < vstruct_last(i);
_k = _n, k = n) {
_n = bkey_next(_k);
_n = bkey_next_skip_noops(_k, vstruct_last(i));
bch2_bkey_to_text(&PBUF(buf), &k);
printk(KERN_ERR "block %u key %5u: %s\n", set,
@ -132,9 +132,7 @@ void __bch2_verify_btree_nr_keys(struct btree *b)
struct btree_nr_keys nr = { 0 };
for_each_bset(b, t)
for (k = btree_bkey_first(b, t);
k != btree_bkey_last(b, t);
k = bkey_next(k))
bset_tree_for_each_key(b, t, k)
if (!bkey_whiteout(k))
btree_keys_account_key_add(&nr, t - b->set, k);
@ -595,7 +593,7 @@ static void bch2_bset_verify_rw_aux_tree(struct btree *b,
rw_aux_tree(b, t)[j - 1].offset);
}
k = bkey_next(k);
k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
BUG_ON(k >= btree_bkey_last(b, t));
}
}
@ -786,9 +784,7 @@ static void __build_rw_aux_tree(struct btree *b, struct bset_tree *t)
rw_aux_tree(b, t)[0].offset =
__btree_node_key_to_offset(b, btree_bkey_first(b, t));
for (k = btree_bkey_first(b, t);
k != btree_bkey_last(b, t);
k = bkey_next(k)) {
bset_tree_for_each_key(b, t, k) {
if (t->size == bset_rw_tree_capacity(b, t))
break;
@ -821,7 +817,7 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
/* First we figure out where the first key in each cacheline is */
eytzinger1_for_each(j, t->size) {
while (bkey_to_cacheline(b, t, k) < cacheline)
prev = k, k = bkey_next(k);
prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
if (k >= btree_bkey_last(b, t)) {
/* XXX: this path sucks */
@ -837,10 +833,10 @@ static void __build_ro_aux_tree(struct btree *b, struct bset_tree *t)
EBUG_ON(tree_to_bkey(b, t, j) != k);
}
while (bkey_next(k) != btree_bkey_last(b, t))
k = bkey_next(k);
while (k != btree_bkey_last(b, t))
prev = k, k = bkey_next_skip_noops(k, btree_bkey_last(b, t));
t->max_key = bkey_unpack_pos(b, k);
t->max_key = bkey_unpack_pos(b, prev);
/* Then we build the tree */
eytzinger1_for_each(j, t->size)
@ -966,7 +962,7 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
struct bkey_packed *p, *i, *ret = NULL, *orig_k = k;
while ((p = __bkey_prev(b, t, k)) && !ret) {
for (i = p; i != k; i = bkey_next(i))
for (i = p; i != k; i = bkey_next_skip_noops(i, k))
if (i->type >= min_key_type)
ret = i;
@ -976,9 +972,11 @@ struct bkey_packed *bch2_bkey_prev_filter(struct btree *b,
if (btree_keys_expensive_checks(b)) {
BUG_ON(ret >= orig_k);
for (i = ret ? bkey_next(ret) : btree_bkey_first(b, t);
for (i = ret
? bkey_next_skip_noops(ret, orig_k)
: btree_bkey_first(b, t);
i != orig_k;
i = bkey_next(i))
i = bkey_next_skip_noops(i, orig_k))
BUG_ON(i->type >= min_key_type);
}
@ -1013,7 +1011,7 @@ static void ro_aux_tree_fix_invalidated_key(struct btree *b,
/* signal to make_bfloat() that they're uninitialized: */
min_key.u64s = max_key.u64s = 0;
if (bkey_next(k) == btree_bkey_last(b, t)) {
if (bkey_next_skip_noops(k, btree_bkey_last(b, t)) == btree_bkey_last(b, t)) {
t->max_key = bkey_unpack_pos(b, k);
for (j = 1; j < t->size; j = j * 2 + 1)
@ -1137,7 +1135,7 @@ static void bch2_bset_fix_lookup_table(struct btree *b,
struct bkey_packed *k = start;
while (1) {
k = bkey_next(k);
k = bkey_next_skip_noops(k, end);
if (k == end)
break;
@ -1386,12 +1384,12 @@ struct bkey_packed *bch2_bset_search_linear(struct btree *b,
while (m != btree_bkey_last(b, t) &&
bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
m) > 0)
m = bkey_next(m);
m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
if (!packed_search)
while (m != btree_bkey_last(b, t) &&
bkey_iter_pos_cmp(b, search, m) > 0)
m = bkey_next(m);
m = bkey_next_skip_noops(m, btree_bkey_last(b, t));
if (btree_keys_expensive_checks(b)) {
struct bkey_packed *prev = bch2_bkey_prev_all(b, t, m);
@ -1625,6 +1623,10 @@ static inline void __bch2_btree_node_iter_advance(struct btree_node_iter *iter,
EBUG_ON(iter->data->k > iter->data->end);
while (!__btree_node_iter_set_end(iter, 0) &&
!__bch2_btree_node_iter_peek_all(iter, b)->u64s)
iter->data->k++;
if (unlikely(__btree_node_iter_set_end(iter, 0))) {
bch2_btree_node_iter_set_drop(iter, iter->data);
return;

View file

@ -284,9 +284,14 @@ static inline struct bkey_s __bkey_disassemble(struct btree *b,
return (struct bkey_s) { .k = u, .v = bkeyp_val(&b->format, k), };
}
#define for_each_bset(_b, _t) \
#define for_each_bset(_b, _t) \
for (_t = (_b)->set; _t < (_b)->set + (_b)->nsets; _t++)
#define bset_tree_for_each_key(_b, _t, _k) \
for (_k = btree_bkey_first(_b, _t); \
_k != btree_bkey_last(_b, _t); \
_k = bkey_next_skip_noops(_k, btree_bkey_last(_b, _t)))
static inline bool bset_has_ro_aux_tree(struct bset_tree *t)
{
return bset_aux_tree_type(t) == BSET_RO_AUX_TREE;

View file

@ -924,7 +924,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
k < vstruct_last(s2) &&
vstruct_blocks_plus(n1->data, c->block_bits,
u64s + k->u64s) <= blocks;
k = bkey_next(k)) {
k = bkey_next_skip_noops(k, vstruct_last(s2))) {
last = k;
u64s += k->u64s;
}

View file

@ -25,34 +25,33 @@ static void verify_no_dups(struct btree *b,
struct bkey_packed *end)
{
#ifdef CONFIG_BCACHEFS_DEBUG
struct bkey_packed *k;
struct bkey_packed *k, *p;
for (k = start; k != end && bkey_next(k) != end; k = bkey_next(k)) {
struct bkey l = bkey_unpack_key(b, k);
struct bkey r = bkey_unpack_key(b, bkey_next(k));
if (start == end)
return;
for (p = start, k = bkey_next_skip_noops(start, end);
k != end;
p = k, k = bkey_next_skip_noops(k, end)) {
struct bkey l = bkey_unpack_key(b, p);
struct bkey r = bkey_unpack_key(b, k);
BUG_ON(btree_node_is_extents(b)
? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
: bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
//BUG_ON(bkey_cmp_packed(&b->format, k, bkey_next(k)) >= 0);
//BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
}
#endif
}
static void clear_needs_whiteout(struct bset *i)
static void set_needs_whiteout(struct bset *i, int v)
{
struct bkey_packed *k;
for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = false;
}
static void set_needs_whiteout(struct bset *i)
{
struct bkey_packed *k;
for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = true;
for (k = i->start;
k != vstruct_last(i);
k = bkey_next_skip_noops(k, vstruct_last(i)))
k->needs_whiteout = v;
}
static void btree_bounce_free(struct bch_fs *c, unsigned order,
@ -167,7 +166,7 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
out = i->start;
for (k = start; k != end; k = n) {
n = bkey_next(k);
n = bkey_next_skip_noops(k, end);
if (bkey_deleted(k) && btree_node_is_extents(b))
continue;
@ -260,7 +259,7 @@ static bool bch2_drop_whiteouts(struct btree *b)
out = i->start;
for (k = start; k != end; k = n) {
n = bkey_next(k);
n = bkey_next_skip_noops(k, end);
if (!bkey_whiteout(k)) {
bkey_copy(out, k);
@ -679,14 +678,6 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
struct bkey tmp;
const char *invalid;
if (btree_err_on(!k->u64s,
BTREE_ERR_FIXABLE, c, b, i,
"KEY_U64s 0: %zu bytes of metadata lost",
vstruct_end(i) - (void *) k)) {
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
if (btree_err_on(bkey_next(k) > vstruct_last(i),
BTREE_ERR_FIXABLE, c, b, i,
"key extends past end of bset")) {
@ -755,7 +746,7 @@ static int validate_bset(struct bch_fs *c, struct btree *b,
prev_pos = u.k->p;
prev = k;
k = bkey_next(k);
k = bkey_next_skip_noops(k, vstruct_last(i));
}
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
@ -914,12 +905,12 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
continue;
}
k = bkey_next(k);
k = bkey_next_skip_noops(k, vstruct_last(i));
}
bch2_bset_build_aux_tree(b, b->set, false);
set_needs_whiteout(btree_bset_first(b));
set_needs_whiteout(btree_bset_first(b), true);
btree_node_reset_sib_u64s(b);
out:
@ -1424,7 +1415,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
: bch2_sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s);
clear_needs_whiteout(i);
set_needs_whiteout(i, false);
/* do we have data to write? */
if (b->written && !i->u64s)
@ -1579,7 +1570,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
}
for_each_bset(b, t)
set_needs_whiteout(bset(b, t));
set_needs_whiteout(bset(b, t), true);
bch2_btree_verify(c, b);

View file

@ -79,9 +79,7 @@ void __bch2_btree_calc_format(struct bkey_format_state *s, struct btree *b)
bch2_bkey_format_add_pos(s, b->data->min_key);
for_each_bset(b, t)
for (k = btree_bkey_first(b, t);
k != btree_bkey_last(b, t);
k = bkey_next(k))
bset_tree_for_each_key(b, t, k)
if (!bkey_whiteout(k)) {
uk = bkey_unpack_key(b, k);
bch2_bkey_format_add_key(s, &uk);
@ -1240,7 +1238,9 @@ static struct btree *__btree_split_node(struct btree_update *as,
*/
k = set1->start;
while (1) {
if (bkey_next(k) == vstruct_last(set1))
struct bkey_packed *n = bkey_next_skip_noops(k, vstruct_last(set1));
if (n == vstruct_last(set1))
break;
if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
break;
@ -1251,7 +1251,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
nr_unpacked++;
prev = k;
k = bkey_next(k);
k = n;
}
BUG_ON(!prev);
@ -1315,7 +1315,7 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
{
struct btree_node_iter node_iter;
struct bkey_i *k = bch2_keylist_front(keys);
struct bkey_packed *p;
struct bkey_packed *src, *dst, *n;
struct bset *i;
BUG_ON(btree_node_type(b) != BKEY_TYPE_BTREE);
@ -1340,16 +1340,18 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
* for the pivot:
*/
i = btree_bset_first(b);
p = i->start;
while (p != vstruct_last(i))
if (bkey_deleted(p)) {
le16_add_cpu(&i->u64s, -p->u64s);
set_btree_bset_end(b, b->set);
memmove_u64s_down(p, bkey_next(p),
(u64 *) vstruct_last(i) -
(u64 *) p);
} else
p = bkey_next(p);
src = dst = i->start;
while (src != vstruct_last(i)) {
n = bkey_next_skip_noops(src, vstruct_last(i));
if (!bkey_deleted(src)) {
memmove_u64s_down(dst, src, src->u64s);
dst = bkey_next(dst);
}
src = n;
}
i->u64s = cpu_to_le16((u64 *) dst - i->_data);
set_btree_bset_end(b, b->set);
BUG_ON(b->nsets != 1 ||
b->nr.live_u64s != le16_to_cpu(btree_bset_first(b)->u64s));