ci: bench: add per slot metric in the commit status
This commit is contained in:
parent
04e1ce3498
commit
64c7534b00
1 changed files with 11 additions and 6 deletions
|
@ -16,6 +16,7 @@ import matplotlib
|
||||||
import matplotlib.dates
|
import matplotlib.dates
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import requests
|
import requests
|
||||||
|
from statistics import mean
|
||||||
|
|
||||||
|
|
||||||
def main(args_in: list[str] | None = None) -> None:
|
def main(args_in: list[str] | None = None) -> None:
|
||||||
|
@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
|
|
||||||
# Prometheus
|
# Prometheus
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
prometheus_metrics = {}
|
||||||
if is_server_listening("0.0.0.0", 9090):
|
if is_server_listening("0.0.0.0", 9090):
|
||||||
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
|
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
|
||||||
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
|
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
|
||||||
|
@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
values = metric_data['data']['result'][0]['values']
|
values = metric_data['data']['result'][0]['values']
|
||||||
timestamps, metric_values = zip(*values)
|
timestamps, metric_values = zip(*values)
|
||||||
metric_values = [float(value) for value in metric_values]
|
metric_values = [float(value) for value in metric_values]
|
||||||
|
prometheus_metrics[metric] = metric_values
|
||||||
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
|
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
|
||||||
plt.figure(figsize=(16, 10), dpi=80)
|
plt.figure(figsize=(16, 10), dpi=80)
|
||||||
plt.plot(timestamps_dt, metric_values, label=metric)
|
plt.plot(timestamps_dt, metric_values, label=metric)
|
||||||
|
@ -178,16 +181,18 @@ xychart-beta
|
||||||
bench_results = {
|
bench_results = {
|
||||||
"i": iterations,
|
"i": iterations,
|
||||||
"req": {
|
"req": {
|
||||||
"p90": data['metrics']["http_req_duration"]["p(90)"],
|
"p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
|
||||||
"avg": data['metrics']["http_req_duration"]["avg"],
|
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
|
||||||
},
|
},
|
||||||
"pp": {
|
"pp": {
|
||||||
"p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"],
|
"p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
|
||||||
"avg": data['metrics']["llamacpp_prompt_tokens"]["avg"],
|
"avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
|
||||||
|
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
||||||
},
|
},
|
||||||
"tg": {
|
"tg": {
|
||||||
"p90": data['metrics']["llamacpp_tokens_second"]["p(90)"],
|
"p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
|
||||||
"avg": data['metrics']["llamacpp_tokens_second"]["avg"],
|
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
||||||
|
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
with open("results.github.env", 'a') as github_env:
|
with open("results.github.env", 'a') as github_env:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue