From 64c7534b00df754a31ff1ec7906716418642d9d4 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Wed, 3 Apr 2024 21:33:41 +0200 Subject: [PATCH] ci: bench: add per slot metric in the commit status --- examples/server/bench/bench.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 672fb57e8..86eeeccf8 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -16,6 +16,7 @@ import matplotlib import matplotlib.dates import matplotlib.pyplot as plt import requests +from statistics import mean def main(args_in: list[str] | None = None) -> None: @@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None: # Prometheus end_time = time.time() + prometheus_metrics = {} if is_server_listening("0.0.0.0", 9090): metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds', 'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred'] @@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None: values = metric_data['data']['result'][0]['values'] timestamps, metric_values = zip(*values) metric_values = [float(value) for value in metric_values] + prometheus_metrics[metric] = metric_values timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps] plt.figure(figsize=(16, 10), dpi=80) plt.plot(timestamps_dt, metric_values, label=metric) @@ -178,16 +181,18 @@ xychart-beta bench_results = { "i": iterations, "req": { - "p90": data['metrics']["http_req_duration"]["p(90)"], - "avg": data['metrics']["http_req_duration"]["avg"], + "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2), + "avg": round(data['metrics']["http_req_duration"]["avg"], 2), }, "pp": { - "p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"], - "avg": data['metrics']["llamacpp_prompt_tokens"]["avg"], + "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2), + "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2), + "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2), }, "tg": { - "p90": data['metrics']["llamacpp_tokens_second"]["p(90)"], - "avg": data['metrics']["llamacpp_tokens_second"]["avg"], + "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2), + "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2), + "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2), }, } with open("results.github.env", 'a') as github_env: