From b6b50b11f9e9a1c82aea1177f3a5d61980359cc8 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Fri, 5 Apr 2024 01:30:24 +0200 Subject: [PATCH] ci: bench: change to the 95 percentile for pp and tg as it is closer to what the server exports in metrics --- .github/workflows/bench.yml | 6 +++--- examples/server/bench/bench.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 70bfb7f68..758796632 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -234,9 +234,9 @@ jobs: Expand details for performance related PR only - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_90_ }}tk/s - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s + - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} + - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s + - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s - ${{ env.BENCH_GRAPH_XLABEL }} diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 6ad4b89ba..6ca637bdd 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -180,16 +180,16 @@ xychart-beta bench_results = { "i": iterations, "req": { - "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2), + "p95": round(data['metrics']["http_req_duration"]["p(95)"], 2), "avg": round(data['metrics']["http_req_duration"]["avg"], 2), }, "pp": { - "p90": round(data['metrics']["llamacpp_prompt_processing_second"]["p(90)"], 2), + "p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2), "avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2), "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2), }, "tg": { - "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2), + "p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2), "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2), "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2), },