bcachefs: pull out time_stats.[ch]

prep work for lifting out of fs/bcachefs/

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2024-03-13 20:16:40 -04:00
parent cdce109431
commit f1ca1abfb0
11 changed files with 324 additions and 277 deletions

View File

@ -82,6 +82,7 @@ bcachefs-y := \
super-io.o \
sysfs.o \
tests.o \
time_stats.o \
thread_with_file.o \
trace.o \
two_state_shared_lock.o \

View File

@ -236,8 +236,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
if (cl)
closure_wait(&c->open_buckets_wait, cl);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
&c->blocked_allocate_open_bucket, true);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true);
spin_unlock(&c->freelist_lock);
return ERR_PTR(-BCH_ERR_open_buckets_empty);
}
@ -263,11 +262,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
ca->nr_open_buckets++;
bch2_open_bucket_hash_add(c, ob);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket],
&c->blocked_allocate_open_bucket, false);
track_event_change(&c->times[BCH_TIME_blocked_allocate],
&c->blocked_allocate, false);
track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false);
track_event_change(&c->times[BCH_TIME_blocked_allocate], false);
spin_unlock(&c->freelist_lock);
return ob;
@ -555,8 +551,7 @@ again:
goto again;
}
track_event_change(&c->times[BCH_TIME_blocked_allocate],
&c->blocked_allocate, true);
track_event_change(&c->times[BCH_TIME_blocked_allocate], true);
ob = ERR_PTR(-BCH_ERR_freelist_empty);
goto err;

View File

@ -212,6 +212,7 @@
#include "recovery_types.h"
#include "sb-errors_types.h"
#include "seqmutex.h"
#include "time_stats.h"
#include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG
@ -924,8 +925,6 @@ struct bch_fs {
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
u64 blocked_allocate;
u64 blocked_allocate_open_bucket;
open_bucket_idx_t open_buckets_freelist;
open_bucket_idx_t open_buckets_nr_free;

View File

@ -566,8 +566,7 @@ out:
ret = -BCH_ERR_journal_res_get_blocked;
if (ret == JOURNAL_ERR_max_in_flight &&
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
&j->max_in_flight_start, true)) {
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true)) {
struct printbuf buf = PRINTBUF;
prt_printf(&buf, "seq %llu\n", journal_cur_seq(j));

View File

@ -1680,8 +1680,7 @@ static CLOSURE_CALLBACK(journal_write_done)
bch2_journal_reclaim_fast(j);
bch2_journal_space_available(j);
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight],
&j->max_in_flight_start, false);
track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], false);
journal_wake(j);
}

View File

@ -62,12 +62,9 @@ void bch2_journal_set_watermark(struct journal *j)
? BCH_WATERMARK_reclaim
: BCH_WATERMARK_stripe;
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space],
&j->low_on_space_start, low_on_space) ||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin],
&j->low_on_pin_start, low_on_pin) ||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full],
&j->write_buffer_full_start, low_on_wb))
if (track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_space], low_on_space) ||
track_event_change(&c->times[BCH_TIME_blocked_journal_low_on_pin], low_on_pin) ||
track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
trace_and_count(c, journal_full, c);
swap(watermark, j->watermark);

View File

@ -287,11 +287,6 @@ struct journal {
u64 nr_noflush_writes;
u64 entry_bytes_written;
u64 low_on_space_start;
u64 low_on_pin_start;
u64 max_in_flight_start;
u64 write_buffer_full_start;
struct bch2_time_stats *flush_write_time;
struct bch2_time_stats *noflush_write_time;
struct bch2_time_stats *flush_seq_time;

163
fs/bcachefs/time_stats.c Normal file
View File

@ -0,0 +1,163 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/time.h>
#include <linux/spinlock.h>
#include "eytzinger.h"
#include "time_stats.h"
static const struct time_unit time_units[] = {
{ "ns", 1 },
{ "us", NSEC_PER_USEC },
{ "ms", NSEC_PER_MSEC },
{ "s", NSEC_PER_SEC },
{ "m", (u64) NSEC_PER_SEC * 60},
{ "h", (u64) NSEC_PER_SEC * 3600},
{ "eon", U64_MAX },
};
const struct time_unit *bch2_pick_time_units(u64 ns)
{
const struct time_unit *u;
for (u = time_units;
u + 1 < time_units + ARRAY_SIZE(time_units) &&
ns >= u[1].nsecs << 1;
u++)
;
return u;
}
static void quantiles_update(struct quantiles *q, u64 v)
{
unsigned i = 0;
while (i < ARRAY_SIZE(q->entries)) {
struct quantile_entry *e = q->entries + i;
if (unlikely(!e->step)) {
e->m = v;
e->step = max_t(unsigned, v / 2, 1024);
} else if (e->m > v) {
e->m = e->m >= e->step
? e->m - e->step
: 0;
} else if (e->m < v) {
e->m = e->m + e->step > e->m
? e->m + e->step
: U32_MAX;
}
if ((e->m > v ? e->m - v : v - e->m) < e->step)
e->step = max_t(unsigned, e->step / 2, 1);
if (v >= e->m)
break;
i = eytzinger0_child(i, v > e->m);
}
}
static inline void time_stats_update_one(struct bch2_time_stats *stats,
u64 start, u64 end)
{
u64 duration, freq;
if (time_after64(end, start)) {
duration = end - start;
mean_and_variance_update(&stats->duration_stats, duration);
mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
stats->max_duration = max(stats->max_duration, duration);
stats->min_duration = min(stats->min_duration, duration);
stats->total_duration += duration;
if (stats->quantiles_enabled)
quantiles_update(&stats->quantiles, duration);
}
if (stats->last_event && time_after64(end, stats->last_event)) {
freq = end - stats->last_event;
mean_and_variance_update(&stats->freq_stats, freq);
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
stats->max_freq = max(stats->max_freq, freq);
stats->min_freq = min(stats->min_freq, freq);
}
stats->last_event = end;
}
void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
struct time_stat_buffer *b)
{
for (struct time_stat_buffer_entry *i = b->entries;
i < b->entries + ARRAY_SIZE(b->entries);
i++)
time_stats_update_one(stats, i->start, i->end);
b->nr = 0;
}
static noinline void time_stats_clear_buffer(struct bch2_time_stats *stats,
struct time_stat_buffer *b)
{
unsigned long flags;
spin_lock_irqsave(&stats->lock, flags);
__bch2_time_stats_clear_buffer(stats, b);
spin_unlock_irqrestore(&stats->lock, flags);
}
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
{
unsigned long flags;
WARN_ONCE(!stats->duration_stats_weighted.weight ||
!stats->freq_stats_weighted.weight,
"uninitialized bch2_time_stats");
if (!stats->buffer) {
spin_lock_irqsave(&stats->lock, flags);
time_stats_update_one(stats, start, end);
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
stats->duration_stats.n > 1024)
stats->buffer =
alloc_percpu_gfp(struct time_stat_buffer,
GFP_ATOMIC);
spin_unlock_irqrestore(&stats->lock, flags);
} else {
struct time_stat_buffer *b;
preempt_disable();
b = this_cpu_ptr(stats->buffer);
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
b->entries[b->nr++] = (struct time_stat_buffer_entry) {
.start = start,
.end = end
};
if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
time_stats_clear_buffer(stats, b);
preempt_enable();
}
}
void bch2_time_stats_exit(struct bch2_time_stats *stats)
{
free_percpu(stats->buffer);
}
void bch2_time_stats_init(struct bch2_time_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->duration_stats_weighted.weight = 8;
stats->freq_stats_weighted.weight = 8;
stats->min_duration = U64_MAX;
stats->min_freq = U64_MAX;
spin_lock_init(&stats->lock);
}

132
fs/bcachefs/time_stats.h Normal file
View File

@ -0,0 +1,132 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* bch2_time_stats - collect statistics on events that have a duration, with nicely
* formatted textual output on demand
*
* - percpu buffering of event collection: cheap enough to shotgun
* everywhere without worrying about overhead
*
* tracks:
* - number of events
* - maximum event duration ever seen
* - sum of all event durations
* - average event duration, standard and weighted
* - standard deviation of event durations, standard and weighted
* and analagous statistics for the frequency of events
*
* We provide both mean and weighted mean (exponentially weighted), and standard
* deviation and weighted standard deviation, to give an efficient-to-compute
* view of current behaviour versus. average behaviour - "did this event source
* just become wonky, or is this typical?".
*
* Particularly useful for tracking down latency issues.
*/
#ifndef _BCACHEFS_TIME_STATS_H
#define _BCACHEFS_TIME_STATS_H
#include <linux/sched/clock.h>
#include <linux/spinlock_types.h>
#include "mean_and_variance.h"
struct time_unit {
const char *name;
u64 nsecs;
};
/*
* given a nanosecond value, pick the preferred time units for printing:
*/
const struct time_unit *bch2_pick_time_units(u64 ns);
/*
* quantiles - do not use:
*
* Only enabled if bch2_time_stats->quantiles_enabled has been manually set - don't
* use in new code.
*/
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
struct quantiles {
struct quantile_entry {
u64 m;
u64 step;
} entries[NR_QUANTILES];
};
struct time_stat_buffer {
unsigned nr;
struct time_stat_buffer_entry {
u64 start;
u64 end;
} entries[32];
};
struct bch2_time_stats {
spinlock_t lock;
bool quantiles_enabled;
/* all fields are in nanoseconds */
u64 min_duration;
u64 max_duration;
u64 total_duration;
u64 max_freq;
u64 min_freq;
u64 last_event;
u64 last_event_start;
struct quantiles quantiles;
struct mean_and_variance duration_stats;
struct mean_and_variance_weighted duration_stats_weighted;
struct mean_and_variance freq_stats;
struct mean_and_variance_weighted freq_stats_weighted;
struct time_stat_buffer __percpu *buffer;
};
void __bch2_time_stats_clear_buffer(struct bch2_time_stats *, struct time_stat_buffer *);
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
/**
* time_stats_update - collect a new event being tracked
*
* @stats - bch2_time_stats to update
* @start - start time of event, recorded with local_clock()
*
* The end duration of the event will be the current time
*/
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
{
__bch2_time_stats_update(stats, start, local_clock());
}
/**
* track_event_change - track state change events
*
* @stats - bch2_time_stats to update
* @v - new state, true or false
*
* Use this when tracking time stats for state changes, i.e. resource X becoming
* blocked/unblocked.
*/
static inline bool track_event_change(struct bch2_time_stats *stats, bool v)
{
if (v != !!stats->last_event_start) {
if (!v) {
bch2_time_stats_update(stats, stats->last_event_start);
stats->last_event_start = 0;
} else {
stats->last_event_start = local_clock() ?: 1;
return true;
}
}
return false;
}
void bch2_time_stats_exit(struct bch2_time_stats *);
void bch2_time_stats_init(struct bch2_time_stats *);
#endif /* _BCACHEFS_TIME_STATS_H */

View File

@ -337,157 +337,16 @@ void bch2_prt_datetime(struct printbuf *out, time64_t sec)
}
#endif
static const struct time_unit {
const char *name;
u64 nsecs;
} time_units[] = {
{ "ns", 1 },
{ "us", NSEC_PER_USEC },
{ "ms", NSEC_PER_MSEC },
{ "s", NSEC_PER_SEC },
{ "m", (u64) NSEC_PER_SEC * 60},
{ "h", (u64) NSEC_PER_SEC * 3600},
{ "eon", U64_MAX },
};
static const struct time_unit *pick_time_units(u64 ns)
{
const struct time_unit *u;
for (u = time_units;
u + 1 < time_units + ARRAY_SIZE(time_units) &&
ns >= u[1].nsecs << 1;
u++)
;
return u;
}
void bch2_pr_time_units(struct printbuf *out, u64 ns)
{
const struct time_unit *u = pick_time_units(ns);
const struct time_unit *u = bch2_pick_time_units(ns);
prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name);
}
/* time stats: */
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
static void bch2_quantiles_update(struct bch2_quantiles *q, u64 v)
{
unsigned i = 0;
while (i < ARRAY_SIZE(q->entries)) {
struct bch2_quantile_entry *e = q->entries + i;
if (unlikely(!e->step)) {
e->m = v;
e->step = max_t(unsigned, v / 2, 1024);
} else if (e->m > v) {
e->m = e->m >= e->step
? e->m - e->step
: 0;
} else if (e->m < v) {
e->m = e->m + e->step > e->m
? e->m + e->step
: U32_MAX;
}
if ((e->m > v ? e->m - v : v - e->m) < e->step)
e->step = max_t(unsigned, e->step / 2, 1);
if (v >= e->m)
break;
i = eytzinger0_child(i, v > e->m);
}
}
static inline void bch2_time_stats_update_one(struct bch2_time_stats *stats,
u64 start, u64 end)
{
u64 duration, freq;
if (time_after64(end, start)) {
duration = end - start;
mean_and_variance_update(&stats->duration_stats, duration);
mean_and_variance_weighted_update(&stats->duration_stats_weighted, duration);
stats->max_duration = max(stats->max_duration, duration);
stats->min_duration = min(stats->min_duration, duration);
stats->total_duration += duration;
bch2_quantiles_update(&stats->quantiles, duration);
}
if (stats->last_event && time_after64(end, stats->last_event)) {
freq = end - stats->last_event;
mean_and_variance_update(&stats->freq_stats, freq);
mean_and_variance_weighted_update(&stats->freq_stats_weighted, freq);
stats->max_freq = max(stats->max_freq, freq);
stats->min_freq = min(stats->min_freq, freq);
}
stats->last_event = end;
}
static void __bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
struct bch2_time_stat_buffer *b)
{
for (struct bch2_time_stat_buffer_entry *i = b->entries;
i < b->entries + ARRAY_SIZE(b->entries);
i++)
bch2_time_stats_update_one(stats, i->start, i->end);
b->nr = 0;
}
static noinline void bch2_time_stats_clear_buffer(struct bch2_time_stats *stats,
struct bch2_time_stat_buffer *b)
{
unsigned long flags;
spin_lock_irqsave(&stats->lock, flags);
__bch2_time_stats_clear_buffer(stats, b);
spin_unlock_irqrestore(&stats->lock, flags);
}
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end)
{
unsigned long flags;
WARN_ONCE(!stats->duration_stats_weighted.weight ||
!stats->freq_stats_weighted.weight,
"uninitialized time_stats");
if (!stats->buffer) {
spin_lock_irqsave(&stats->lock, flags);
bch2_time_stats_update_one(stats, start, end);
if (mean_and_variance_weighted_get_mean(stats->freq_stats_weighted) < 32 &&
stats->duration_stats.n > 1024)
stats->buffer =
alloc_percpu_gfp(struct bch2_time_stat_buffer,
GFP_ATOMIC);
spin_unlock_irqrestore(&stats->lock, flags);
} else {
struct bch2_time_stat_buffer *b;
preempt_disable();
b = this_cpu_ptr(stats->buffer);
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
b->entries[b->nr++] = (struct bch2_time_stat_buffer_entry) {
.start = start,
.end = end
};
if (unlikely(b->nr == ARRAY_SIZE(b->entries)))
bch2_time_stats_clear_buffer(stats, b);
preempt_enable();
}
}
static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns)
{
const struct time_unit *u = pick_time_units(ns);
const struct time_unit *u = bch2_pick_time_units(ns);
prt_printf(out, "%llu ", div64_u64(ns, u->nsecs));
prt_tab_rjust(out);
@ -506,10 +365,8 @@ static inline void pr_name_and_units(struct printbuf *out, const char *name, u64
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats)
{
const struct time_unit *u;
s64 f_mean = 0, d_mean = 0;
u64 q, last_q = 0, f_stddev = 0, d_stddev = 0;
int i;
u64 f_stddev = 0, d_stddev = 0;
if (stats->buffer) {
int cpu;
@ -608,39 +465,24 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
printbuf_tabstops_reset(out);
i = eytzinger0_first(NR_QUANTILES);
u = pick_time_units(stats->quantiles.entries[i].m);
if (stats->quantiles_enabled) {
int i = eytzinger0_first(NR_QUANTILES);
const struct time_unit *u =
bch2_pick_time_units(stats->quantiles.entries[i].m);
u64 last_q = 0;
prt_printf(out, "quantiles (%s):\t", u->name);
eytzinger0_for_each(i, NR_QUANTILES) {
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
prt_printf(out, "quantiles (%s):\t", u->name);
eytzinger0_for_each(i, NR_QUANTILES) {
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
q = max(stats->quantiles.entries[i].m, last_q);
prt_printf(out, "%llu ",
div_u64(q, u->nsecs));
if (is_last)
prt_newline(out);
last_q = q;
u64 q = max(stats->quantiles.entries[i].m, last_q);
prt_printf(out, "%llu ", div_u64(q, u->nsecs));
if (is_last)
prt_newline(out);
last_q = q;
}
}
}
#else
void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) {}
#endif
void bch2_time_stats_exit(struct bch2_time_stats *stats)
{
free_percpu(stats->buffer);
}
void bch2_time_stats_init(struct bch2_time_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->duration_stats_weighted.weight = 8;
stats->freq_stats_weighted.weight = 8;
stats->min_duration = U64_MAX;
stats->min_freq = U64_MAX;
spin_lock_init(&stats->lock);
}
/* ratelimit: */

View File

@ -21,6 +21,7 @@
#include "mean_and_variance.h"
#include "darray.h"
#include "time_stats.h"
struct closure;
@ -329,84 +330,8 @@ static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
#endif
}
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
struct bch2_quantiles {
struct bch2_quantile_entry {
u64 m;
u64 step;
} entries[NR_QUANTILES];
};
struct bch2_time_stat_buffer {
unsigned nr;
struct bch2_time_stat_buffer_entry {
u64 start;
u64 end;
} entries[32];
};
struct bch2_time_stats {
spinlock_t lock;
/* all fields are in nanoseconds */
u64 min_duration;
u64 max_duration;
u64 total_duration;
u64 max_freq;
u64 min_freq;
u64 last_event;
struct bch2_quantiles quantiles;
struct mean_and_variance duration_stats;
struct mean_and_variance_weighted duration_stats_weighted;
struct mean_and_variance freq_stats;
struct mean_and_variance_weighted freq_stats_weighted;
struct bch2_time_stat_buffer __percpu *buffer;
};
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
void __bch2_time_stats_update(struct bch2_time_stats *stats, u64, u64);
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start)
{
__bch2_time_stats_update(stats, start, local_clock());
}
static inline bool track_event_change(struct bch2_time_stats *stats,
u64 *start, bool v)
{
if (v != !!*start) {
if (!v) {
bch2_time_stats_update(stats, *start);
*start = 0;
} else {
*start = local_clock() ?: 1;
return true;
}
}
return false;
}
#else
static inline void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) {}
static inline void bch2_time_stats_update(struct bch2_time_stats *stats, u64 start) {}
static inline bool track_event_change(struct bch2_time_stats *stats,
u64 *start, bool v)
{
bool ret = v && !*start;
*start = v;
return ret;
}
#endif
void bch2_time_stats_to_text(struct printbuf *, struct bch2_time_stats *);
void bch2_time_stats_exit(struct bch2_time_stats *);
void bch2_time_stats_init(struct bch2_time_stats *);
#define ewma_add(ewma, val, weight) \
({ \
typeof(ewma) _ewma = (ewma); \