mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-09-15 07:04:44 +00:00
bcachefs: Pipeline binary searches and linear searches
This makes prefetching for the linear search at the end of the lookup much more effective, and is a couple percent speedup. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
fab4f8c653
commit
c45376866a
1 changed files with 69 additions and 45 deletions
|
@ -1326,6 +1326,25 @@ static int bset_search_tree_slowpath(const struct btree *b,
|
|||
packed_search, search) < 0;
|
||||
}
|
||||
|
||||
static inline void prefetch_four_cachelines(void *p)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
asm(".intel_syntax noprefix;"
|
||||
"prefetcht0 [%0 - 127 + 64 * 0];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 1];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 2];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 3];"
|
||||
".att_syntax prefix;"
|
||||
:
|
||||
: "r" (p + 127));
|
||||
#else
|
||||
prefetch(p + L1_CACHE_BYTES * 0);
|
||||
prefetch(p + L1_CACHE_BYTES * 1);
|
||||
prefetch(p + L1_CACHE_BYTES * 2);
|
||||
prefetch(p + L1_CACHE_BYTES * 3);
|
||||
#endif
|
||||
}
|
||||
|
||||
__flatten
|
||||
static struct bkey_packed *bset_search_tree(const struct btree *b,
|
||||
struct bset_tree *t,
|
||||
|
@ -1333,34 +1352,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
|
|||
const struct bkey_packed *packed_search)
|
||||
{
|
||||
struct ro_aux_tree *base = ro_aux_tree_base(b, t);
|
||||
struct bkey_float *f = bkey_float_get(base, 1);
|
||||
void *p;
|
||||
struct bkey_float *f;
|
||||
unsigned inorder, n = 1;
|
||||
|
||||
while (1) {
|
||||
if (likely(n << 4 < t->size)) {
|
||||
p = bkey_float_get(base, n << 4);
|
||||
prefetch(p);
|
||||
} else if (n << 3 < t->size) {
|
||||
inorder = __eytzinger1_to_inorder(n, t->size, t->extra);
|
||||
p = bset_cacheline(b, t, inorder);
|
||||
#ifdef CONFIG_X86_64
|
||||
asm(".intel_syntax noprefix;"
|
||||
"prefetcht0 [%0 - 127 + 64 * 0];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 1];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 2];"
|
||||
"prefetcht0 [%0 - 127 + 64 * 3];"
|
||||
".att_syntax prefix;"
|
||||
:
|
||||
: "r" (p + 127));
|
||||
#else
|
||||
prefetch(p + L1_CACHE_BYTES * 0);
|
||||
prefetch(p + L1_CACHE_BYTES * 1);
|
||||
prefetch(p + L1_CACHE_BYTES * 2);
|
||||
prefetch(p + L1_CACHE_BYTES * 3);
|
||||
#endif
|
||||
} else if (n >= t->size)
|
||||
break;
|
||||
do {
|
||||
if (likely(n << 4 < t->size))
|
||||
prefetch(bkey_float_get(base, n << 4));
|
||||
|
||||
f = bkey_float_get(base, n);
|
||||
|
||||
|
@ -1391,17 +1388,12 @@ static struct bkey_packed *bset_search_tree(const struct btree *b,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the first key greater than or equal to @search
|
||||
*/
|
||||
__always_inline __flatten
|
||||
static struct bkey_packed *bch2_bset_search(struct btree *b,
|
||||
static __always_inline __flatten
|
||||
struct bkey_packed *__bch2_bset_search(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
struct bpos *search,
|
||||
struct bkey_packed *packed_search,
|
||||
const struct bkey_packed *lossy_packed_search)
|
||||
{
|
||||
struct bkey_packed *m;
|
||||
|
||||
/*
|
||||
* First, we search for a cacheline, then lastly we do a linear search
|
||||
|
@ -1420,11 +1412,9 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
|
|||
|
||||
switch (bset_aux_tree_type(t)) {
|
||||
case BSET_NO_AUX_TREE:
|
||||
m = btree_bkey_first(b, t);
|
||||
break;
|
||||
return btree_bkey_first(b, t);
|
||||
case BSET_RW_AUX_TREE:
|
||||
m = bset_search_write_set(b, t, search, lossy_packed_search);
|
||||
break;
|
||||
return bset_search_write_set(b, t, search, lossy_packed_search);
|
||||
case BSET_RO_AUX_TREE:
|
||||
/*
|
||||
* Each node in the auxiliary search tree covers a certain range
|
||||
|
@ -1436,10 +1426,20 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
|
|||
if (bkey_cmp(*search, t->max_key) > 0)
|
||||
return btree_bkey_last(b, t);
|
||||
|
||||
m = bset_search_tree(b, t, search, lossy_packed_search);
|
||||
break;
|
||||
return bset_search_tree(b, t, search, lossy_packed_search);
|
||||
default:
|
||||
unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline __flatten
|
||||
struct bkey_packed *bch2_bset_search_linear(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
struct bpos *search,
|
||||
struct bkey_packed *packed_search,
|
||||
const struct bkey_packed *lossy_packed_search,
|
||||
struct bkey_packed *m)
|
||||
{
|
||||
if (lossy_packed_search)
|
||||
while (m != btree_bkey_last(b, t) &&
|
||||
bkey_iter_cmp_p_or_unp(b, search, lossy_packed_search,
|
||||
|
@ -1462,6 +1462,23 @@ static struct bkey_packed *bch2_bset_search(struct btree *b,
|
|||
return m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the first key greater than or equal to @search
|
||||
*/
|
||||
static __always_inline __flatten
|
||||
struct bkey_packed *bch2_bset_search(struct btree *b,
|
||||
struct bset_tree *t,
|
||||
struct bpos *search,
|
||||
struct bkey_packed *packed_search,
|
||||
const struct bkey_packed *lossy_packed_search)
|
||||
{
|
||||
struct bkey_packed *m = __bch2_bset_search(b, t, search,
|
||||
lossy_packed_search);
|
||||
|
||||
return bch2_bset_search_linear(b, t, search,
|
||||
packed_search, lossy_packed_search, m);
|
||||
}
|
||||
|
||||
/* Btree node iterator */
|
||||
|
||||
static inline void __bch2_btree_node_iter_push(struct btree_node_iter *iter,
|
||||
|
@ -1552,9 +1569,10 @@ __flatten
|
|||
void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
||||
struct btree *b, struct bpos *search)
|
||||
{
|
||||
struct bset_tree *t;
|
||||
struct bkey_packed p, *packed_search = NULL;
|
||||
struct btree_node_iter_set *pos = iter->data;
|
||||
struct bkey_packed *k[MAX_BSETS];
|
||||
unsigned i;
|
||||
|
||||
EBUG_ON(bkey_cmp(*search, b->data->min_key) < 0);
|
||||
bset_aux_tree_verify(b);
|
||||
|
@ -1573,14 +1591,20 @@ void bch2_btree_node_iter_init(struct btree_node_iter *iter,
|
|||
return;
|
||||
}
|
||||
|
||||
for_each_bset(b, t) {
|
||||
struct bkey_packed *k = bch2_bset_search(b, t, search,
|
||||
packed_search, &p);
|
||||
for (i = 0; i < b->nsets; i++) {
|
||||
k[i] = __bch2_bset_search(b, b->set + i, search, &p);
|
||||
prefetch_four_cachelines(k[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < b->nsets; i++) {
|
||||
struct bset_tree *t = b->set + i;
|
||||
struct bkey_packed *end = btree_bkey_last(b, t);
|
||||
|
||||
if (k != end)
|
||||
k[i] = bch2_bset_search_linear(b, t, search,
|
||||
packed_search, &p, k[i]);
|
||||
if (k[i] != end)
|
||||
*pos++ = (struct btree_node_iter_set) {
|
||||
__btree_node_key_to_offset(b, k),
|
||||
__btree_node_key_to_offset(b, k[i]),
|
||||
__btree_node_key_to_offset(b, end)
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue