diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 035da548737b..8eed82ac41f1 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -558,7 +558,6 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; - b->uncompacted_whiteout_u64s = 0; bch2_btree_keys_init(b, &c->expensive_debug_checks); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 8532087f2754..9acf59c0710d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -79,6 +79,81 @@ static void *btree_bounce_alloc(struct bch_fs *c, unsigned order, return mempool_alloc(&c->btree_bounce_pool, GFP_NOIO); } +static void sort_bkey_ptrs(const struct btree *bt, + struct bkey_packed **ptrs, unsigned nr) +{ + unsigned n = nr, a = nr / 2, b, c, d; + + if (!a) + return; + + /* Heap sort: see lib/sort.c: */ + while (1) { + if (a) + a--; + else if (--n) + swap(ptrs[0], ptrs[n]); + else + break; + + for (b = a; c = 2 * b + 1, (d = c + 1) < n;) + b = bkey_cmp_packed(bt, + ptrs[c], + ptrs[d]) >= 0 ? c : d; + if (d == n) + b = c; + + while (b != a && + bkey_cmp_packed(bt, + ptrs[a], + ptrs[b]) >= 0) + b = (b - 1) / 2; + c = b; + while (b != a) { + b = (b - 1) / 2; + swap(ptrs[b], ptrs[c]); + } + } +} + +static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) +{ + struct bkey_packed *new_whiteouts, **whiteout_ptrs, *k; + bool used_mempool1 = false, used_mempool2 = false; + unsigned order, i, nr = 0; + + if (!b->whiteout_u64s) + return; + + order = get_order(b->whiteout_u64s * sizeof(u64)); + + new_whiteouts = btree_bounce_alloc(c, order, &used_mempool1); + whiteout_ptrs = btree_bounce_alloc(c, order, &used_mempool2); + + for (k = unwritten_whiteouts_start(c, b); + k != unwritten_whiteouts_end(c, b); + k = bkey_next(k)) + whiteout_ptrs[nr++] = k; + + sort_bkey_ptrs(b, whiteout_ptrs, nr); + + k = new_whiteouts; + + for (i = 0; i < nr; i++) { + bkey_copy(k, whiteout_ptrs[i]); + k = bkey_next(k); + } + + verify_no_dups(b, new_whiteouts, + (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); + + memcpy_u64s(unwritten_whiteouts_start(c, b), + new_whiteouts, b->whiteout_u64s); + + btree_bounce_free(c, order, used_mempool2, whiteout_ptrs); + btree_bounce_free(c, order, used_mempool1, new_whiteouts); +} + static unsigned should_compact_bset(struct btree *b, struct bset_tree *t, bool compacting, enum compact_mode mode) @@ -116,6 +191,8 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, if (!whiteout_u64s) return false; + bch2_sort_whiteouts(c, b); + sort_iter_init(&sort_iter, b); whiteout_u64s += b->whiteout_u64s; @@ -171,11 +248,14 @@ bool __bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, if (bkey_deleted(k) && btree_node_is_extents(b)) continue; + BUG_ON(bkey_whiteout(k) && + k->needs_whiteout && + bkey_written(b, k)); + if (bkey_whiteout(k) && !k->needs_whiteout) continue; if (bkey_whiteout(k)) { - unreserve_whiteout(b, k); memcpy_u64s(u_pos, k, bkeyp_key_u64s(f, k)); set_bkeyp_val_u64s(f, u_pos, 0); u_pos = bkey_next(u_pos); @@ -1342,21 +1422,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); - /* - * We can't block on six_lock_write() here; another thread might be - * trying to get a journal reservation with read locks held, and getting - * a journal reservation might be blocked on flushing the journal and - * doing btree writes: - */ - if (lock_type_held == SIX_LOCK_intent && - six_trylock_write(&b->c.lock)) { - __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN); - six_unlock_write(&b->c.lock); - } else { - __bch2_compact_whiteouts(c, b, COMPACT_WRITTEN_NO_WRITE_LOCK); - } - - BUG_ON(b->uncompacted_whiteout_u64s); + bch2_sort_whiteouts(c, b); sort_iter_init(&sort_iter, b); @@ -1545,7 +1611,6 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) return false; BUG_ON(b->whiteout_u64s); - BUG_ON(b->uncompacted_whiteout_u64s); clear_btree_node_just_written(b); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 3a26a8802e86..e370474fd8c2 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -97,7 +97,6 @@ struct btree { struct btree_nr_keys nr; u16 sib_u64s[2]; u16 whiteout_u64s; - u16 uncompacted_whiteout_u64s; u8 page_order; u8 unpack_fn_len; diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 85f1320fa7b1..8f9d4a0b68ea 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -251,8 +251,7 @@ static inline ssize_t __bch_btree_u64s_remaining(struct bch_fs *c, void *end) { ssize_t used = bset_byte_offset(b, end) / sizeof(u64) + - b->whiteout_u64s + - b->uncompacted_whiteout_u64s; + b->whiteout_u64s; ssize_t total = c->opts.btree_node_size << 6; return total - used; @@ -302,23 +301,19 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, return NULL; } -static inline void unreserve_whiteout(struct btree *b, struct bkey_packed *k) +static inline void push_whiteout(struct bch_fs *c, struct btree *b, + struct bkey_packed *k) { - if (bkey_written(b, k)) { - EBUG_ON(b->uncompacted_whiteout_u64s < - bkeyp_key_u64s(&b->format, k)); - b->uncompacted_whiteout_u64s -= - bkeyp_key_u64s(&b->format, k); - } -} + unsigned u64s = bkeyp_key_u64s(&b->format, k); + struct bkey_packed *dst; -static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k) -{ - if (bkey_written(b, k)) { - BUG_ON(!k->needs_whiteout); - b->uncompacted_whiteout_u64s += - bkeyp_key_u64s(&b->format, k); - } + BUG_ON(u64s > bch_btree_keys_u64s_remaining(c, b)); + + b->whiteout_u64s += bkeyp_key_u64s(&b->format, k); + dst = unwritten_whiteouts_start(c, b); + memcpy_u64s(dst, k, u64s); + dst->u64s = u64s; + dst->type = KEY_TYPE_deleted; } /* diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 6e5405f0b372..d13f1fc75bdf 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -104,38 +104,43 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, return true; } - insert->k.needs_whiteout = k->needs_whiteout; - btree_account_key_drop(b, k); + if (bkey_whiteout(&insert->k)) { + unsigned clobber_u64s = k->u64s, new_u64s = k->u64s; + + k->type = KEY_TYPE_deleted; + + if (k->needs_whiteout) { + push_whiteout(iter->trans->c, b, k); + k->needs_whiteout = false; + } + + if (k >= btree_bset_last(b)->start) { + bch2_bset_delete(b, k, clobber_u64s); + new_u64s = 0; + } + + bch2_btree_node_iter_fix(iter, b, node_iter, k, + clobber_u64s, new_u64s); + return true; + + } + if (k >= btree_bset_last(b)->start) { clobber_u64s = k->u64s; - - /* - * If we're deleting, and the key we're deleting doesn't - * need a whiteout (it wasn't overwriting a key that had - * been written to disk) - just delete it: - */ - if (bkey_whiteout(&insert->k) && !k->needs_whiteout) { - bch2_bset_delete(b, k, clobber_u64s); - bch2_btree_node_iter_fix(iter, b, node_iter, - k, clobber_u64s, 0); - return true; - } - goto overwrite; } + insert->k.needs_whiteout = k->needs_whiteout; + k->needs_whiteout = false; k->type = KEY_TYPE_deleted; + /* + * XXX: we should be able to do this without two calls to + * bch2_btree_node_iter_fix: + */ bch2_btree_node_iter_fix(iter, b, node_iter, k, k->u64s, k->u64s); - - if (bkey_whiteout(&insert->k)) { - reserve_whiteout(b, k); - return true; - } else { - k->needs_whiteout = false; - } } else { /* * Deleting, but the key to delete wasn't found - nothing to do: