linux-stable/fs/bcachefs/btree_update_leaf.c
Kent Overstreet 3636ed489a bcachefs: Deferred btree updates
Will be used in the future for inode updates, which will be very helpful
for multithreaded workloads that have to update the inode with every
extent update (appends, or updates that change i_sectors)

Also will be used eventually for fully persistent alloc info

However - we still need a mechanism for reserving space in the journal
prior to getting a journal reservation, so it's not technically safe to
make use of this just yet, we could deadlock with the journal full
(although not likely to be an issue in practice)

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2023-10-22 17:08:12 -04:00

912 lines
22 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_io.h"
#include "btree_iter.h"
#include "btree_locking.h"
#include "buckets.h"
#include "debug.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
#include "replicas.h"
#include "trace.h"
#include <linux/sort.h>
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
bool bch2_btree_bset_insert_key(struct btree_iter *iter,
struct btree *b,
struct btree_node_iter *node_iter,
struct bkey_i *insert)
{
const struct bkey_format *f = &b->format;
struct bkey_packed *k;
unsigned clobber_u64s;
EBUG_ON(btree_node_just_written(b));
EBUG_ON(bset_written(b, btree_bset_last(b)));
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
EBUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0 ||
bkey_cmp(insert->k.p, b->data->max_key) > 0);
k = bch2_btree_node_iter_peek_all(node_iter, b);
if (k && !bkey_cmp_packed(b, k, &insert->k)) {
BUG_ON(bkey_whiteout(k));
if (!bkey_written(b, k) &&
bkey_val_u64s(&insert->k) == bkeyp_val_u64s(f, k) &&
!bkey_whiteout(&insert->k)) {
k->type = insert->k.type;
memcpy_u64s(bkeyp_val(f, k), &insert->v,
bkey_val_u64s(&insert->k));
return true;
}
insert->k.needs_whiteout = k->needs_whiteout;
btree_account_key_drop(b, k);
if (k >= btree_bset_last(b)->start) {
clobber_u64s = k->u64s;
/*
* If we're deleting, and the key we're deleting doesn't
* need a whiteout (it wasn't overwriting a key that had
* been written to disk) - just delete it:
*/
if (bkey_whiteout(&insert->k) && !k->needs_whiteout) {
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter,
k, clobber_u64s, 0);
bch2_btree_iter_verify(iter, b);
return true;
}
goto overwrite;
}
k->type = KEY_TYPE_deleted;
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
return true;
} else {
k->needs_whiteout = false;
}
} else {
/*
* Deleting, but the key to delete wasn't found - nothing to do:
*/
if (bkey_whiteout(&insert->k))
return false;
insert->k.needs_whiteout = false;
}
k = bch2_btree_node_iter_bset_pos(node_iter, b, bset_tree_last(b));
clobber_u64s = 0;
overwrite:
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
bch2_btree_node_iter_fix(iter, b, node_iter, k,
clobber_u64s, k->u64s);
bch2_btree_iter_verify(iter, b);
return true;
}
static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
unsigned i, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct btree_write *w = container_of(pin, struct btree_write, journal);
struct btree *b = container_of(w, struct btree, writes[i]);
btree_node_lock_type(c, b, SIX_LOCK_read);
bch2_btree_node_write_cond(c, b,
(btree_current_write(b) == w && w->journal.seq == seq));
six_unlock_read(&b->lock);
}
static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 0, seq);
}
static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
{
return __btree_node_flush(j, pin, 1, seq);
}
static inline void __btree_journal_key(struct btree_insert *trans,
enum btree_id btree_id,
struct bkey_i *insert)
{
struct journal *j = &trans->c->journal;
u64 seq = trans->journal_res.seq;
bool needs_whiteout = insert->k.needs_whiteout;
/* ick */
insert->k.needs_whiteout = false;
bch2_journal_add_keys(j, &trans->journal_res,
btree_id, insert);
insert->k.needs_whiteout = needs_whiteout;
bch2_journal_set_has_inode(j, &trans->journal_res,
insert->k.p.inode);
if (trans->journal_seq)
*trans->journal_seq = seq;
}
void bch2_btree_journal_key(struct btree_insert *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct btree *b = iter->l[0].b;
struct btree_write *w = btree_current_write(b);
EBUG_ON(iter->level || b->level);
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
__btree_journal_key(trans, iter->btree_id, insert);
btree_bset_last(b)->journal_seq =
cpu_to_le64(trans->journal_res.seq);
}
if (unlikely(!journal_pin_active(&w->journal))) {
u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
? trans->journal_res.seq
: j->replay_journal_seq;
bch2_journal_pin_add(j, seq, &w->journal,
btree_node_write_idx(b) == 0
? btree_node_flush0
: btree_node_flush1);
}
if (unlikely(!btree_node_dirty(b)))
set_btree_node_dirty(b);
}
static enum btree_insert_ret
bch2_insert_fixup_key(struct btree_insert *trans,
struct btree_insert_entry *insert)
{
struct btree_iter *iter = insert->iter;
struct btree_iter_level *l = &iter->l[0];
EBUG_ON(iter->level);
EBUG_ON(insert->k->k.u64s >
bch_btree_keys_u64s_remaining(trans->c, l->b));
if (bch2_btree_bset_insert_key(iter, l->b, &l->iter,
insert->k))
bch2_btree_journal_key(trans, iter, insert->k);
return BTREE_INSERT_OK;
}
/**
* btree_insert_key - insert a key one key into a leaf node
*/
static enum btree_insert_ret
btree_insert_key_leaf(struct btree_insert *trans,
struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
enum btree_insert_ret ret;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
bch2_mark_update(trans, insert);
ret = !btree_node_is_extents(b)
? bch2_insert_fixup_key(trans, insert)
: bch2_insert_fixup_extent(trans, insert);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
if (b->sib_u64s[0] != U16_MAX && live_u64s_added < 0)
b->sib_u64s[0] = max(0, (int) b->sib_u64s[0] + live_u64s_added);
if (b->sib_u64s[1] != U16_MAX && live_u64s_added < 0)
b->sib_u64s[1] = max(0, (int) b->sib_u64s[1] + live_u64s_added);
if (u64s_added > live_u64s_added &&
bch2_maybe_compact_whiteouts(c, b))
bch2_btree_iter_reinit_node(iter, b);
trace_btree_insert_key(c, b, insert->k);
return ret;
}
/* Deferred btree updates: */
static void deferred_update_flush(struct journal *j,
struct journal_entry_pin *pin,
u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct deferred_update *d =
container_of(pin, struct deferred_update, journal);
u64 tmp[32];
struct bkey_i *k = (void *) tmp;
unsigned gen;
int ret;
if (d->allocated_u64s > ARRAY_SIZE(tmp)) {
k = kmalloc(d->allocated_u64s * sizeof(u64), GFP_NOFS);
BUG_ON(!k); /* XXX */
}
spin_lock(&d->lock);
gen = d->gen;
if (journal_pin_active(&d->journal)) {
BUG_ON(d->k.k.u64s > d->allocated_u64s);
bkey_copy(k, &d->k);
spin_unlock(&d->lock);
ret = bch2_btree_insert(c, d->btree_id, k, NULL, NULL,
BTREE_INSERT_NOFAIL);
bch2_fs_fatal_err_on(ret && !bch2_journal_error(j),
c, "error flushing deferred btree update: %i", ret);
spin_lock(&d->lock);
}
if (gen == d->gen)
bch2_journal_pin_drop(j, &d->journal);
spin_unlock(&d->lock);
if (k != (void *) tmp)
kfree(k);
}
static enum btree_insert_ret
btree_insert_key_deferred(struct btree_insert *trans,
struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct deferred_update *d = insert->d;
BUG_ON(trans->flags & BTREE_INSERT_JOURNAL_REPLAY);
BUG_ON(insert->k->u64s > d->allocated_u64s);
__btree_journal_key(trans, d->btree_id, insert->k);
spin_lock(&d->lock);
d->gen++;
bkey_copy(&d->k, insert->k);
spin_unlock(&d->lock);
bch2_journal_pin_update(j, trans->journal_res.seq, &d->journal,
deferred_update_flush);
return BTREE_INSERT_OK;
}
void bch2_deferred_update_free(struct bch_fs *c,
struct deferred_update *d)
{
deferred_update_flush(&c->journal, &d->journal, 0);
BUG_ON(journal_pin_active(&d->journal));
bch2_journal_pin_flush(&c->journal, &d->journal);
kfree(d);
}
struct deferred_update *
bch2_deferred_update_alloc(struct bch_fs *c,
enum btree_id btree_id,
unsigned u64s)
{
struct deferred_update *d;
BUG_ON(u64s > U8_MAX);
d = kmalloc(offsetof(struct deferred_update, k) +
u64s * sizeof(u64), GFP_NOFS);
BUG_ON(!d);
memset(d, 0, offsetof(struct deferred_update, k));
spin_lock_init(&d->lock);
d->allocated_u64s = u64s;
d->btree_id = btree_id;
return d;
}
/* struct btree_insert operations: */
/*
* We sort transaction entries so that if multiple iterators point to the same
* leaf node they'll be adjacent:
*/
static bool same_leaf_as_prev(struct btree_insert *trans,
struct btree_insert_entry *i)
{
return i != trans->entries &&
!i->deferred &&
i[0].iter->l[0].b == i[-1].iter->l[0].b;
}
#define __trans_next_entry(_trans, _i, _filter) \
({ \
while ((_i) < (_trans)->entries + (_trans->nr) && !(_filter)) \
(_i)++; \
\
(_i) < (_trans)->entries + (_trans->nr); \
})
#define __trans_for_each_entry(_trans, _i, _filter) \
for ((_i) = (_trans)->entries; \
__trans_next_entry(_trans, _i, _filter); \
(_i)++)
#define trans_for_each_entry(trans, i) \
__trans_for_each_entry(trans, i, true)
#define trans_for_each_iter(trans, i) \
__trans_for_each_entry(trans, i, !(i)->deferred)
#define trans_for_each_leaf(trans, i) \
__trans_for_each_entry(trans, i, !(i)->deferred && \
!same_leaf_as_prev(trans, i))
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
bch2_btree_node_lock_write(b, iter);
if (btree_node_just_written(b) &&
bch2_btree_post_write_cleanup(c, b))
bch2_btree_iter_reinit_node(iter, b);
/*
* If the last bset has been written, or if it's gotten too big - start
* a new bset to insert into:
*/
if (want_new_bset(c, b))
bch2_btree_init_next(c, b, iter);
}
static void multi_lock_write(struct bch_fs *c, struct btree_insert *trans)
{
struct btree_insert_entry *i;
trans_for_each_leaf(trans, i)
bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
}
static void multi_unlock_write(struct btree_insert *trans)
{
struct btree_insert_entry *i;
trans_for_each_leaf(trans, i)
bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
}
static inline int btree_trans_cmp(struct btree_insert_entry l,
struct btree_insert_entry r)
{
return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?:
btree_iter_cmp(l.iter, r.iter);
}
/* Normal update interface: */
static enum btree_insert_ret
btree_key_can_insert(struct btree_insert *trans,
struct btree_insert_entry *insert,
unsigned *u64s)
{
struct bch_fs *c = trans->c;
struct btree *b = insert->iter->l[0].b;
static enum btree_insert_ret ret;
if (unlikely(btree_node_fake(b)))
return BTREE_INSERT_BTREE_NODE_FULL;
if (!bch2_bkey_replicas_marked(c,
bkey_i_to_s_c(insert->k),
true))
return BTREE_INSERT_NEED_MARK_REPLICAS;
ret = !btree_node_is_extents(b)
? BTREE_INSERT_OK
: bch2_extent_can_insert(trans, insert, u64s);
if (ret)
return ret;
if (*u64s > bch_btree_keys_u64s_remaining(c, b))
return BTREE_INSERT_BTREE_NODE_FULL;
return BTREE_INSERT_OK;
}
static inline enum btree_insert_ret
do_btree_insert_one(struct btree_insert *trans,
struct btree_insert_entry *insert)
{
return likely(!insert->deferred)
? btree_insert_key_leaf(trans, insert)
: btree_insert_key_deferred(trans, insert);
}
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
static inline int do_btree_insert_at(struct btree_insert *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_iter *linked;
unsigned u64s;
int ret;
trans_for_each_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
/* reserve space for deferred updates */
__trans_for_each_entry(trans, i, i->deferred) {
}
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
u64s = 0;
trans_for_each_entry(trans, i)
u64s += jset_u64s(i->k->k.u64s);
while ((ret = bch2_journal_res_get(&c->journal,
&trans->journal_res, u64s,
JOURNAL_RES_GET_NONBLOCK)) == -EAGAIN) {
struct btree_iter *iter = NULL;
trans_for_each_iter(trans, i)
iter = i->iter;
if (iter)
bch2_btree_iter_unlock(iter);
ret = bch2_journal_res_get(&c->journal,
&trans->journal_res, u64s,
JOURNAL_RES_GET_CHECK);
if (ret)
return ret;
if (iter && !bch2_btree_iter_relock(iter)) {
trans_restart(" (iter relock after journal res get blocked)");
return -EINTR;
}
}
if (ret)
return ret;
}
multi_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
goto out;
}
/*
* Check if the insert will fit in the leaf node with the write lock
* held, otherwise another thread could write the node changing the
* amount of space available:
*/
u64s = 0;
trans_for_each_iter(trans, i) {
/* Multiple inserts might go to same leaf: */
if (!same_leaf_as_prev(trans, i))
u64s = 0;
u64s += i->k->k.u64s;
ret = btree_key_can_insert(trans, i, &u64s);
if (ret) {
*stopped_at = i;
goto out;
}
}
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
trans_for_each_entry(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
else if (inject_invalid_keys(c))
trans_for_each_entry(trans, i)
i->k->k.version = MAX_VERSION;
}
if (trans->flags & BTREE_INSERT_NOUNLOCK) {
/*
* linked iterators that weren't being updated may or may not
* have been traversed/locked, depending on what the caller was
* doing:
*/
trans_for_each_iter(trans, i) {
for_each_btree_iter(i->iter, linked)
if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
linked->flags |= BTREE_ITER_NOUNLOCK;
break;
}
}
trans->did_work = true;
trans_for_each_entry(trans, i) {
switch (do_btree_insert_one(trans, i)) {
case BTREE_INSERT_OK:
break;
case BTREE_INSERT_NEED_TRAVERSE:
BUG_ON((trans->flags &
(BTREE_INSERT_ATOMIC|BTREE_INSERT_NOUNLOCK)));
ret = -EINTR;
goto out;
default:
BUG();
}
}
out:
multi_unlock_write(trans);
bch2_journal_res_put(&c->journal, &trans->journal_res);
return ret;
}
static inline void btree_insert_entry_checks(struct bch_fs *c,
struct btree_insert_entry *i)
{
enum btree_id btree_id = !i->deferred
? i->iter->btree_id
: i->d->btree_id;
if (!i->deferred) {
BUG_ON(i->iter->level);
BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
bch2_btree_iter_verify_locks(i->iter);
}
BUG_ON(debug_check_bkeys(c) &&
!bkey_deleted(&i->k->k) &&
bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id));
}
/**
* __bch_btree_insert_at - insert keys at given iterator positions
*
* This is main entry point for btree updates.
*
* Return values:
* -EINTR: locking changed, this function should be called again. Only returned
* if passed BTREE_INSERT_ATOMIC.
* -EROFS: filesystem read only
* -EIO: journal or btree node IO error
*/
int __bch2_btree_insert_at(struct btree_insert *trans)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
struct btree_iter *linked;
unsigned flags;
int ret;
BUG_ON(!trans->nr);
/* for the sake of sanity: */
BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
trans_for_each_entry(trans, i)
btree_insert_entry_checks(c, i);
if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS;
retry:
trans_for_each_iter(trans, i) {
unsigned old_locks_want = i->iter->locks_want;
unsigned old_uptodate = i->iter->uptodate;
if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
old_locks_want, old_uptodate);
ret = -EINTR;
goto err;
}
if (i->iter->flags & BTREE_ITER_ERROR) {
ret = -EIO;
goto err;
}
}
ret = do_btree_insert_at(trans, &i);
if (unlikely(ret))
goto err;
trans_for_each_leaf(trans, i)
bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
trans_for_each_iter(trans, i)
bch2_btree_iter_downgrade(i->iter);
out:
percpu_ref_put(&c->writes);
/* make sure we didn't drop or screw up locks: */
trans_for_each_iter(trans, i) {
bch2_btree_iter_verify_locks(i->iter);
break;
}
trans_for_each_iter(trans, i) {
for_each_btree_iter(i->iter, linked)
linked->flags &= ~BTREE_ITER_NOUNLOCK;
break;
}
BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
return ret;
err:
flags = trans->flags;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
* update; if we haven't done anything yet it doesn't apply
*/
if (!trans->did_work)
flags &= ~BTREE_INSERT_NOUNLOCK;
switch (ret) {
case BTREE_INSERT_BTREE_NODE_FULL:
ret = bch2_btree_split_leaf(c, i->iter, flags);
/*
* if the split succeeded without dropping locks the insert will
* still be atomic (in the BTREE_INSERT_ATOMIC sense, what the
* caller peeked() and is overwriting won't have changed)
*/
#if 0
/*
* XXX:
* split -> btree node merging (of parent node) might still drop
* locks when we're not passing it BTREE_INSERT_NOUNLOCK
*/
if (!ret && !trans->did_work)
goto retry;
#endif
/*
* don't care if we got ENOSPC because we told split it
* couldn't block:
*/
if (!ret || (flags & BTREE_INSERT_NOUNLOCK)) {
trans_restart(" (split)");
ret = -EINTR;
}
break;
case BTREE_INSERT_NEED_GC_LOCK:
ret = -EINTR;
if (!down_read_trylock(&c->gc_lock)) {
if (flags & BTREE_INSERT_NOUNLOCK)
goto out;
bch2_btree_iter_unlock(trans->entries[0].iter);
down_read(&c->gc_lock);
}
up_read(&c->gc_lock);
break;
case BTREE_INSERT_ENOSPC:
ret = -ENOSPC;
break;
case BTREE_INSERT_NEED_MARK_REPLICAS:
if (flags & BTREE_INSERT_NOUNLOCK) {
ret = -EINTR;
goto out;
}
bch2_btree_iter_unlock(trans->entries[0].iter);
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k))
?: -EINTR;
break;
default:
BUG_ON(ret >= 0);
break;
}
if (ret == -EINTR) {
if (flags & BTREE_INSERT_NOUNLOCK) {
trans_restart(" (can't unlock)");
goto out;
}
trans_for_each_iter(trans, i) {
int ret2 = bch2_btree_iter_traverse(i->iter);
if (ret2) {
ret = ret2;
trans_restart(" (traverse)");
goto out;
}
BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
}
/*
* BTREE_ITER_ATOMIC means we have to return -EINTR if we
* dropped locks:
*/
if (!(flags & BTREE_INSERT_ATOMIC))
goto retry;
trans_restart(" (atomic)");
}
goto out;
}
int bch2_trans_commit(struct btree_trans *trans,
struct disk_reservation *disk_res,
u64 *journal_seq,
unsigned flags)
{
struct btree_insert insert = {
.c = trans->c,
.disk_res = disk_res,
.journal_seq = journal_seq,
.flags = flags,
.nr = trans->nr_updates,
.entries = trans->updates,
};
if (!trans->nr_updates)
return 0;
trans->nr_updates = 0;
return __bch2_btree_insert_at(&insert);
}
int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
{
struct bkey_i k;
bkey_init(&k.k);
k.k.p = iter->pos;
return bch2_btree_insert_at(iter->c, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|flags,
BTREE_INSERT_ENTRY(iter, &k));
}
int bch2_btree_insert_list_at(struct btree_iter *iter,
struct keylist *keys,
struct disk_reservation *disk_res,
u64 *journal_seq, unsigned flags)
{
BUG_ON(flags & BTREE_INSERT_ATOMIC);
BUG_ON(bch2_keylist_empty(keys));
bch2_verify_keylist_sorted(keys);
while (!bch2_keylist_empty(keys)) {
int ret = bch2_btree_insert_at(iter->c, disk_res,
journal_seq, flags,
BTREE_INSERT_ENTRY(iter, bch2_keylist_front(keys)));
if (ret)
return ret;
bch2_keylist_pop_front(keys);
}
return 0;
}
/**
* bch_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
* @id: btree to insert into
* @insert_keys: list of keys to insert
* @hook: insert callback
*/
int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
struct bkey_i *k,
struct disk_reservation *disk_res,
u64 *journal_seq, int flags)
{
struct btree_iter iter;
int ret;
bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
BTREE_ITER_INTENT);
ret = bch2_btree_insert_at(c, disk_res, journal_seq, flags,
BTREE_INSERT_ENTRY(&iter, k));
bch2_btree_iter_unlock(&iter);
return ret;
}
/*
* bch_btree_delete_range - delete everything within a given range
*
* Range is a half open interval - [start, end)
*/
int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
u64 *journal_seq)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
bch2_btree_iter_init(&iter, c, id, start,
BTREE_ITER_INTENT);
while ((k = bch2_btree_iter_peek(&iter)).k &&
!(ret = btree_iter_err(k)) &&
bkey_cmp(iter.pos, end) < 0) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
/* really shouldn't be using a bare, unpadded bkey_i */
struct bkey_i delete;
bkey_init(&delete.k);
/*
* For extents, iter.pos won't necessarily be the same as
* bkey_start_pos(k.k) (for non extents they always will be the
* same). It's important that we delete starting from iter.pos
* because the range we want to delete could start in the middle
* of k.
*
* (bch2_btree_iter_peek() does guarantee that iter.pos >=
* bkey_start_pos(k.k)).
*/
delete.k.p = iter.pos;
if (iter.flags & BTREE_ITER_IS_EXTENTS) {
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end, &delete.k);
}
ret = bch2_btree_insert_at(c, NULL, journal_seq,
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(&iter, &delete));
if (ret)
break;
bch2_btree_iter_cond_resched(&iter);
}
bch2_btree_iter_unlock(&iter);
return ret;
}