server: bench: change gauge custom metrics to trend

server: bench: add trend custom metrics for total tokens per second average
This commit is contained in:
Pierrick HYMBERT 2024-03-09 09:15:15 +01:00
parent bed1cdda9a
commit 572758a665
2 changed files with 10 additions and 6 deletions

View file

@ -64,13 +64,14 @@ SERVER_BENCH_N_PROMPTS=500 k6 run script.js --duration 10m --iterations 500 --vu
#### Metrics
Following metrics are available:
- `llamacpp_prompt_tokens` Gauge of OAI response `usage.prompt_tokens`
- `llamacpp_prompt_tokens_total_counter` Counter of OAI response `usage.prompt_tokens`
- `llamacpp_completion_tokens` Gauge of OAI response `usage.completion_tokens`
- `llamacpp_completion_tokens_total_counter` Counter of OAI response `usage.completion_tokens`
Following metrics are available computed from the OAI chat completions response `usage`:
- `llamacpp_tokens_second` Trend of `usage.total_tokens / request duration`
- `llamacpp_prompt_tokens` Trend of `usage.prompt_tokens`
- `llamacpp_prompt_tokens_total_counter` Counter of `usage.prompt_tokens`
- `llamacpp_completion_tokens` Trend of `usage.completion_tokens`
- `llamacpp_completion_tokens_total_counter` Counter of `usage.completion_tokens`
- `llamacpp_completions_truncated_rate` Rate of completions truncated, i.e. if `finish_reason === 'length'`
- `llamacpp_completions_stop_rate` Rate of completions truncated, i.e. if `finish_reason === 'stop'`
- `llamacpp_completions_stop_rate` Rate of completions stopped by the model, i.e. if `finish_reason === 'stop'`
The script will fail if too many completions are truncated, see `llamacpp_completions_truncated_rate`.

View file

@ -34,6 +34,7 @@ const data = new SharedArray('conversations', function () {
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
@ -94,6 +95,8 @@ export default function () {
llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length')
llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop')
llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3)
} else {
console.error(`response: ${res.body}`)
}