From 572758a665e9435ec235ae6c788ed2e3f099d8cc Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sat, 9 Mar 2024 09:15:15 +0100 Subject: [PATCH] server: bench: change gauge custom metrics to trend server: bench: add trend custom metrics for total tokens per second average --- examples/server/bench/README.md | 13 +++++++------ examples/server/bench/script.js | 3 +++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/server/bench/README.md b/examples/server/bench/README.md index 481dc5c4d..6e1709ee5 100644 --- a/examples/server/bench/README.md +++ b/examples/server/bench/README.md @@ -64,13 +64,14 @@ SERVER_BENCH_N_PROMPTS=500 k6 run script.js --duration 10m --iterations 500 --vu #### Metrics -Following metrics are available: -- `llamacpp_prompt_tokens` Gauge of OAI response `usage.prompt_tokens` -- `llamacpp_prompt_tokens_total_counter` Counter of OAI response `usage.prompt_tokens` -- `llamacpp_completion_tokens` Gauge of OAI response `usage.completion_tokens` -- `llamacpp_completion_tokens_total_counter` Counter of OAI response `usage.completion_tokens` +Following metrics are available computed from the OAI chat completions response `usage`: +- `llamacpp_tokens_second` Trend of `usage.total_tokens / request duration` +- `llamacpp_prompt_tokens` Trend of `usage.prompt_tokens` +- `llamacpp_prompt_tokens_total_counter` Counter of `usage.prompt_tokens` +- `llamacpp_completion_tokens` Trend of `usage.completion_tokens` +- `llamacpp_completion_tokens_total_counter` Counter of `usage.completion_tokens` - `llamacpp_completions_truncated_rate` Rate of completions truncated, i.e. if `finish_reason === 'length'` -- `llamacpp_completions_stop_rate` Rate of completions truncated, i.e. if `finish_reason === 'stop'` +- `llamacpp_completions_stop_rate` Rate of completions stopped by the model, i.e. if `finish_reason === 'stop'` The script will fail if too many completions are truncated, see `llamacpp_completions_truncated_rate`. diff --git a/examples/server/bench/script.js b/examples/server/bench/script.js index d076b8c34..94b8aa94a 100644 --- a/examples/server/bench/script.js +++ b/examples/server/bench/script.js @@ -34,6 +34,7 @@ const data = new SharedArray('conversations', function () { const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens') const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens') +const llamacpp_tokens_second = new Trend('llamacpp_tokens_second') const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter') const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter') @@ -94,6 +95,8 @@ export default function () { llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length') llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop') + + llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3) } else { console.error(`response: ${res.body}`) }