ci: bench: add mermaid in case of image cannot be uploaded

2024-03-26 01:08:59 +01:00 · 2024-03-26 01:08:59 +01:00 · 93434fdc7e
commit 93434fdc7e
parent 5c0b2a2b59
2 changed files with 128 additions and 35 deletions
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@ -117,6 +117,18 @@ jobs:
          cat results.github.env >> $GITHUB_ENV
          # Remove dataset as we do not want it in the artefact
          rm ShareGPT_V3_unfiltered_cleaned_split.json
      - uses: actions/upload-artifact@v4
        with:
          name: benchmark-results
          compression-level: 9
          path: |
            examples/server/bench/*.png
            examples/server/bench/*.json
            examples/server/bench/*.log
      - name: Commit status
        uses: Sibz/github-status-action@v1
        with:
@ -128,6 +140,7 @@ jobs:
      - name: Upload benchmark images
        uses: devicons/public-upload-to-imgur@v2.2.2
        continue-on-error: true # Important as it looks unstable: 503
        id: imgur_step
        with:
          client_id: ${{secrets.IMGUR_CLIENT_ID}}
@ -136,44 +149,95 @@ jobs:
            examples/server/bench/predicted_tokens_seconds.png
            examples/server/bench/kv_cache_usage_ratio.png
            examples/server/bench/requests_processing.png
-            examples/server/bench/requests_deferred.png
+
      - name: Extract mermaid
        id: set_mermaid
        run: |
          set -eux
          cd examples/server/bench
          PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
          echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
          echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV
          PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
          echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
          echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV
          KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
          echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
          echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV
          REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
          echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
          echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV
      - name: Comment PR
        uses: mshick/add-pr-comment@v2
        id: comment_pr
        if: ${{ github.event.pull_request != '' }}
        continue-on-error: true
        with:
          message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
          message: |
            📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
            - ${{ env.BENCH_GRAPH_XLABEL }}
            - req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
            <p align="center">
-                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
-                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+
            <details>
                <summary>More</summary>
            ```mermaid
            ${{ env.PROMPT_TOKENS_SECONDS }}
            ```
            </details>
            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
            <details>
                <summary>More</summary>
            ```mermaid
            ${{ env.PREDICTED_TOKENS_SECONDS }}
            ```
            </details>
            </p>
            <details>
                <summary>Details</summary>
                <p align="center">
-                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
                </p>
            </detail>
-      - name: Upload results
+            <details>
-        if: ${{ github.event.pull_request }}
+                <summary>More</summary>
        uses: edunad/actions-image@v2.0.0
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          path: 'examples/server/bench/*.png'
          title: |
            llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
          annotationLevel: 'success'
-      - uses: actions/upload-artifact@v4
+            ```mermaid
-        with:
+            ${{ env.KV_CACHE_USAGE_RATIO }}
-          name: benchmark-results
+            ```
-          compression-level: 9
+
-          path: |
+            </details>
-            examples/server/bench/*.png
+
-            examples/server/bench/*.json
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
-            examples/server/bench/*.log
+
            <details>
                <summary>More</summary>
            ```mermaid
            ${{ env.REQUESTS_PROCESSING }}
            ```
            </details>
            </p>
            </details>
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@ -101,6 +101,12 @@ def main(args_in: list[str] | None = None) -> None:
        while is_server_listening(args.host, args.port):
            time.sleep(0.1)
    title = (f"llama.cpp {args.name} on {args.runner_label}\n "
             f"duration={args.duration} {iterations} iterations")
    xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
              f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
              f"branch={args.branch} commit={args.commit}")
    # Prometheus
    end_time = time.time()
    if is_server_listening("0.0.0.0", 9090):
@ -121,23 +127,20 @@ def main(args_in: list[str] | None = None) -> None:
                values = metric_data['data']['result'][0]['values']
                timestamps, metric_values = zip(*values)
                metric_values = [float(value) for value in metric_values]
-                timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
+                timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
                plt.figure(figsize=(16, 10), dpi=80)
-                plt.plot(timestamps, metric_values, label=metric)
+                plt.plot(timestamps_dt, metric_values, label=metric)
                plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
                plt.yticks(fontsize=12, alpha=.7)
-                plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
+                ylabel = f"llamacpp:{metric}"
-                          f"duration={args.duration} {iterations} iterations",
+                plt.title(title,
                          fontsize=14, wrap=True)
                plt.grid(axis='both', alpha=.3)
-                plt.ylabel(f"llamacpp:{metric}", fontsize=22)
+                plt.ylabel(ylabel, fontsize=22)
-                plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
+                plt.xlabel(xlabel, fontsize=14, wrap=True)
                           f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
                           f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
                           f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
                plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
-                plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
+                plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
                plt.gcf().autofmt_xdate()
                # Remove borders
@ -150,6 +153,27 @@ def main(args_in: list[str] | None = None) -> None:
                plt.savefig(f'{metric}.png')
                plt.close()
                # Mermaid format in case image failed
                with (open(f"{metric}.mermaid", 'w') as mermaid_f):
                    mermaid = (
                    f"""---
 config:
    xyChart:
        titleFontSize: 12
        width: 900
        height: 600
    themeVariables:
        xyChart:
            titleColor: "#000000"
 ---
 xychart-beta
    title "{title}"
    y-axis "llamacpp:{metric}"
    x-axis "llamacpp:{metric}" {int(min(timestamps))} --> {int(max(timestamps))}
    line [{', '.join([str(round(float(value))) for value in metric_values])}]
                    """)
                    mermaid_f.write(mermaid)
    # 140 chars max for commit status description
    bench_results = {
        "req": {
@ -169,6 +193,11 @@ def main(args_in: list[str] | None = None) -> None:
        github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
        github_env.write(f"BENCH_ITERATIONS={iterations}\n")
        title = title.replace('\n', ' ')
        xlabel = xlabel.replace('\n', ' ')
        github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
        github_env.write(f"BENCH_GRAPH_XLABEL={xlabel}\n")
 def start_benchmark(args):
    k6_path = 'k6'