server: bench: change gauge custom metrics to trend
server: bench: add trend custom metrics for total tokens per second average
This commit is contained in:
parent
bed1cdda9a
commit
572758a665
2 changed files with 10 additions and 6 deletions
|
@ -64,13 +64,14 @@ SERVER_BENCH_N_PROMPTS=500 k6 run script.js --duration 10m --iterations 500 --vu
|
|||
|
||||
#### Metrics
|
||||
|
||||
Following metrics are available:
|
||||
- `llamacpp_prompt_tokens` Gauge of OAI response `usage.prompt_tokens`
|
||||
- `llamacpp_prompt_tokens_total_counter` Counter of OAI response `usage.prompt_tokens`
|
||||
- `llamacpp_completion_tokens` Gauge of OAI response `usage.completion_tokens`
|
||||
- `llamacpp_completion_tokens_total_counter` Counter of OAI response `usage.completion_tokens`
|
||||
Following metrics are available computed from the OAI chat completions response `usage`:
|
||||
- `llamacpp_tokens_second` Trend of `usage.total_tokens / request duration`
|
||||
- `llamacpp_prompt_tokens` Trend of `usage.prompt_tokens`
|
||||
- `llamacpp_prompt_tokens_total_counter` Counter of `usage.prompt_tokens`
|
||||
- `llamacpp_completion_tokens` Trend of `usage.completion_tokens`
|
||||
- `llamacpp_completion_tokens_total_counter` Counter of `usage.completion_tokens`
|
||||
- `llamacpp_completions_truncated_rate` Rate of completions truncated, i.e. if `finish_reason === 'length'`
|
||||
- `llamacpp_completions_stop_rate` Rate of completions truncated, i.e. if `finish_reason === 'stop'`
|
||||
- `llamacpp_completions_stop_rate` Rate of completions stopped by the model, i.e. if `finish_reason === 'stop'`
|
||||
|
||||
The script will fail if too many completions are truncated, see `llamacpp_completions_truncated_rate`.
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@ const data = new SharedArray('conversations', function () {
|
|||
|
||||
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
|
||||
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
|
||||
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
|
||||
|
||||
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
|
||||
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
|
||||
|
@ -94,6 +95,8 @@ export default function () {
|
|||
|
||||
llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length')
|
||||
llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop')
|
||||
|
||||
llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3)
|
||||
} else {
|
||||
console.error(`response: ${res.body}`)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue