ci: bench: change to the 95 percentile for pp and tg as it is closer to what the server exports in metrics
This commit is contained in:
parent
59dc4bbb99
commit
b6b50b11f9
2 changed files with 6 additions and 6 deletions
6
.github/workflows/bench.yml
vendored
6
.github/workflows/bench.yml
vendored
|
@ -234,9 +234,9 @@ jobs:
|
||||||
<summary>Expand details for performance related PR only</summary>
|
<summary>Expand details for performance related PR only</summary>
|
||||||
|
|
||||||
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
||||||
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(90)=${{ env.HTTP_REQ_DURATION_P_90_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
||||||
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_90_ }}tk/s
|
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
||||||
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(90)=${{ env.LLAMACPP_TOKENS_SECOND_P_90_ }}tk/s
|
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
||||||
- ${{ env.BENCH_GRAPH_XLABEL }}
|
- ${{ env.BENCH_GRAPH_XLABEL }}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -180,16 +180,16 @@ xychart-beta
|
||||||
bench_results = {
|
bench_results = {
|
||||||
"i": iterations,
|
"i": iterations,
|
||||||
"req": {
|
"req": {
|
||||||
"p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
|
"p95": round(data['metrics']["http_req_duration"]["p(95)"], 2),
|
||||||
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
|
"avg": round(data['metrics']["http_req_duration"]["avg"], 2),
|
||||||
},
|
},
|
||||||
"pp": {
|
"pp": {
|
||||||
"p90": round(data['metrics']["llamacpp_prompt_processing_second"]["p(90)"], 2),
|
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
|
||||||
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
|
||||||
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
|
||||||
},
|
},
|
||||||
"tg": {
|
"tg": {
|
||||||
"p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
|
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
|
||||||
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
|
||||||
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
|
||||||
},
|
},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue