ci: bench: add mermaid in case of image cannot be uploaded
This commit is contained in:
parent
5c0b2a2b59
commit
93434fdc7e
2 changed files with 128 additions and 35 deletions
114
.github/workflows/bench.yml
vendored
114
.github/workflows/bench.yml
vendored
|
@ -117,6 +117,18 @@ jobs:
|
||||||
|
|
||||||
cat results.github.env >> $GITHUB_ENV
|
cat results.github.env >> $GITHUB_ENV
|
||||||
|
|
||||||
|
# Remove dataset as we do not want it in the artefact
|
||||||
|
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-results
|
||||||
|
compression-level: 9
|
||||||
|
path: |
|
||||||
|
examples/server/bench/*.png
|
||||||
|
examples/server/bench/*.json
|
||||||
|
examples/server/bench/*.log
|
||||||
|
|
||||||
- name: Commit status
|
- name: Commit status
|
||||||
uses: Sibz/github-status-action@v1
|
uses: Sibz/github-status-action@v1
|
||||||
with:
|
with:
|
||||||
|
@ -128,6 +140,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload benchmark images
|
- name: Upload benchmark images
|
||||||
uses: devicons/public-upload-to-imgur@v2.2.2
|
uses: devicons/public-upload-to-imgur@v2.2.2
|
||||||
|
continue-on-error: true # Important as it looks unstable: 503
|
||||||
id: imgur_step
|
id: imgur_step
|
||||||
with:
|
with:
|
||||||
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
||||||
|
@ -136,44 +149,95 @@ jobs:
|
||||||
examples/server/bench/predicted_tokens_seconds.png
|
examples/server/bench/predicted_tokens_seconds.png
|
||||||
examples/server/bench/kv_cache_usage_ratio.png
|
examples/server/bench/kv_cache_usage_ratio.png
|
||||||
examples/server/bench/requests_processing.png
|
examples/server/bench/requests_processing.png
|
||||||
examples/server/bench/requests_deferred.png
|
|
||||||
|
- name: Extract mermaid
|
||||||
|
id: set_mermaid
|
||||||
|
run: |
|
||||||
|
set -eux
|
||||||
|
|
||||||
|
cd examples/server/bench
|
||||||
|
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
||||||
|
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
||||||
|
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
||||||
|
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
|
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
||||||
|
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
||||||
|
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
||||||
|
echo "EOF" >> $GITHUB_ENV
|
||||||
|
|
||||||
- name: Comment PR
|
- name: Comment PR
|
||||||
uses: mshick/add-pr-comment@v2
|
uses: mshick/add-pr-comment@v2
|
||||||
id: comment_pr
|
id: comment_pr
|
||||||
if: ${{ github.event.pull_request != '' }}
|
if: ${{ github.event.pull_request != '' }}
|
||||||
|
continue-on-error: true
|
||||||
with:
|
with:
|
||||||
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
|
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
|
||||||
message: |
|
message: |
|
||||||
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
||||||
|
|
||||||
|
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||||
|
- req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
|
||||||
|
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
|
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
|
||||||
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PROMPT_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.PREDICTED_TOKENS_SECONDS }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
</p>
|
</p>
|
||||||
<details>
|
<details>
|
||||||
<summary>Details</summary>
|
<summary>Details</summary>
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
|
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
|
||||||
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
|
|
||||||
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
|
|
||||||
</p>
|
|
||||||
</detail>
|
|
||||||
|
|
||||||
- name: Upload results
|
<details>
|
||||||
if: ${{ github.event.pull_request }}
|
<summary>More</summary>
|
||||||
uses: edunad/actions-image@v2.0.0
|
|
||||||
with:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
path: 'examples/server/bench/*.png'
|
|
||||||
title: |
|
|
||||||
llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
|
|
||||||
annotationLevel: 'success'
|
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
```mermaid
|
||||||
with:
|
${{ env.KV_CACHE_USAGE_RATIO }}
|
||||||
name: benchmark-results
|
```
|
||||||
compression-level: 9
|
|
||||||
path: |
|
</details>
|
||||||
examples/server/bench/*.png
|
|
||||||
examples/server/bench/*.json
|
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
|
||||||
examples/server/bench/*.log
|
|
||||||
|
<details>
|
||||||
|
<summary>More</summary>
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
${{ env.REQUESTS_PROCESSING }}
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</details>
|
||||||
|
|
|
@ -101,6 +101,12 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
while is_server_listening(args.host, args.port):
|
while is_server_listening(args.host, args.port):
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
title = (f"llama.cpp {args.name} on {args.runner_label}\n "
|
||||||
|
f"duration={args.duration} {iterations} iterations")
|
||||||
|
xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
|
||||||
|
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
|
||||||
|
f"branch={args.branch} commit={args.commit}")
|
||||||
|
|
||||||
# Prometheus
|
# Prometheus
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
if is_server_listening("0.0.0.0", 9090):
|
if is_server_listening("0.0.0.0", 9090):
|
||||||
|
@ -121,23 +127,20 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
values = metric_data['data']['result'][0]['values']
|
values = metric_data['data']['result'][0]['values']
|
||||||
timestamps, metric_values = zip(*values)
|
timestamps, metric_values = zip(*values)
|
||||||
metric_values = [float(value) for value in metric_values]
|
metric_values = [float(value) for value in metric_values]
|
||||||
timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
|
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
|
||||||
plt.figure(figsize=(16, 10), dpi=80)
|
plt.figure(figsize=(16, 10), dpi=80)
|
||||||
plt.plot(timestamps, metric_values, label=metric)
|
plt.plot(timestamps_dt, metric_values, label=metric)
|
||||||
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
|
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
|
||||||
plt.yticks(fontsize=12, alpha=.7)
|
plt.yticks(fontsize=12, alpha=.7)
|
||||||
|
|
||||||
plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
|
ylabel = f"llamacpp:{metric}"
|
||||||
f"duration={args.duration} {iterations} iterations",
|
plt.title(title,
|
||||||
fontsize=14, wrap=True)
|
fontsize=14, wrap=True)
|
||||||
plt.grid(axis='both', alpha=.3)
|
plt.grid(axis='both', alpha=.3)
|
||||||
plt.ylabel(f"llamacpp:{metric}", fontsize=22)
|
plt.ylabel(ylabel, fontsize=22)
|
||||||
plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
|
plt.xlabel(xlabel, fontsize=14, wrap=True)
|
||||||
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
|
|
||||||
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
|
|
||||||
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
|
|
||||||
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
|
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
|
||||||
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
|
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
|
||||||
plt.gcf().autofmt_xdate()
|
plt.gcf().autofmt_xdate()
|
||||||
|
|
||||||
# Remove borders
|
# Remove borders
|
||||||
|
@ -150,6 +153,27 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
plt.savefig(f'{metric}.png')
|
plt.savefig(f'{metric}.png')
|
||||||
plt.close()
|
plt.close()
|
||||||
|
|
||||||
|
# Mermaid format in case image failed
|
||||||
|
with (open(f"{metric}.mermaid", 'w') as mermaid_f):
|
||||||
|
mermaid = (
|
||||||
|
f"""---
|
||||||
|
config:
|
||||||
|
xyChart:
|
||||||
|
titleFontSize: 12
|
||||||
|
width: 900
|
||||||
|
height: 600
|
||||||
|
themeVariables:
|
||||||
|
xyChart:
|
||||||
|
titleColor: "#000000"
|
||||||
|
---
|
||||||
|
xychart-beta
|
||||||
|
title "{title}"
|
||||||
|
y-axis "llamacpp:{metric}"
|
||||||
|
x-axis "llamacpp:{metric}" {int(min(timestamps))} --> {int(max(timestamps))}
|
||||||
|
line [{', '.join([str(round(float(value))) for value in metric_values])}]
|
||||||
|
""")
|
||||||
|
mermaid_f.write(mermaid)
|
||||||
|
|
||||||
# 140 chars max for commit status description
|
# 140 chars max for commit status description
|
||||||
bench_results = {
|
bench_results = {
|
||||||
"req": {
|
"req": {
|
||||||
|
@ -169,6 +193,11 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
|
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
|
||||||
github_env.write(f"BENCH_ITERATIONS={iterations}\n")
|
github_env.write(f"BENCH_ITERATIONS={iterations}\n")
|
||||||
|
|
||||||
|
title = title.replace('\n', ' ')
|
||||||
|
xlabel = xlabel.replace('\n', ' ')
|
||||||
|
github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
|
||||||
|
github_env.write(f"BENCH_GRAPH_XLABEL={xlabel}\n")
|
||||||
|
|
||||||
|
|
||||||
def start_benchmark(args):
|
def start_benchmark(args):
|
||||||
k6_path = 'k6'
|
k6_path = 'k6'
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue