diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index d2ef2bbd5..5f5b3d212 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -217,8 +217,15 @@ jobs: with: message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} message: | - - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for ${{ matrix.model }} ${{ matrix.ftype }}: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 +

+ + 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + +

+ +
+ + Expand details for performance related PR only - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} @@ -226,9 +233,6 @@ jobs: - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s **total=${{ env.LLAMACPP_COMPLETION_TOKENS_TOTAL_COUNTER_RATE }}tk/s** - ${{ env.BENCH_GRAPH_XLABEL }} -
- - Time series

diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index ea5d3854d..672fb57e8 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -176,6 +176,7 @@ xychart-beta # 140 chars max for commit status description bench_results = { + "i": iterations, "req": { "p90": data['metrics']["http_req_duration"]["p(90)"], "avg": data['metrics']["http_req_duration"]["avg"],