server: bench: fix graph, fix output artifact

This commit is contained in:
Pierrick HYMBERT 2024-03-25 21:44:45 +01:00
parent 799317b27d
commit 5c0b2a2b59
2 changed files with 11 additions and 4 deletions

View file

@ -174,6 +174,6 @@ jobs:
name: benchmark-results name: benchmark-results
compression-level: 9 compression-level: 9
path: | path: |
examples/server/bench/**/.png examples/server/bench/*.png
examples/server/bench/**/.json examples/server/bench/*.json
examples/server/bench/**/.log examples/server/bench/*.log

View file

@ -1,5 +1,4 @@
import argparse import argparse
import base64
import json import json
import os import os
import re import re
@ -13,6 +12,8 @@ import traceback
from contextlib import closing from contextlib import closing
from datetime import datetime from datetime import datetime
import matplotlib
import matplotlib.dates
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import requests import requests
@ -109,6 +110,10 @@ def main(args_in: list[str] | None = None) -> None:
for metric in metrics: for metric in metrics:
resp = requests.get(f"http://localhost:9090/api/v1/query_range", resp = requests.get(f"http://localhost:9090/api/v1/query_range",
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2}) params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
with open(f"{metric}.json", 'w') as metric_json:
metric_json.write(resp.text)
if resp.status_code != 200: if resp.status_code != 200:
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}") print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
else: else:
@ -131,6 +136,8 @@ def main(args_in: list[str] | None = None) -> None:
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n" f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n" f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True) f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
plt.gcf().autofmt_xdate() plt.gcf().autofmt_xdate()
# Remove borders # Remove borders