From e9a3ba643b46de1571973e9de47ef4f00f96ee13 Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 10 May 2024 15:33:11 +0200 Subject: [PATCH] update llama-bench readme --- examples/llama-bench/README.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/llama-bench/README.md b/examples/llama-bench/README.md index 10f37b441..857840564 100644 --- a/examples/llama-bench/README.md +++ b/examples/llama-bench/README.md @@ -26,16 +26,21 @@ options: -m, --model (default: models/7B/ggml-model-q4_0.gguf) -p, --n-prompt (default: 512) -n, --n-gen (default: 128) - -b, --batch-size (default: 512) - -ctk , --cache-type-k (default: f16) - -ctv , --cache-type-v (default: f16) - -t, --threads (default: 112) + -pg (default: 512,128) + -b, --batch-size (default: 2048) + -ub, --ubatch-size (default: 512) + -ctk, --cache-type-k (default: f16) + -ctv, --cache-type-v (default: f16) + -t, --threads (default: 16) -ngl, --n-gpu-layers (default: 99) -sm, --split-mode (default: layer) -mg, --main-gpu (default: 0) -nkvo, --no-kv-offload <0|1> (default: 0) + -fa, --flash-attn <0|1> (default: 0) -mmp, --mmap <0|1> (default: 1) - -ts, --tensor_split (default: 0) + --numa (default: disabled) + -embd, --embeddings <0|1> (default: 0) + -ts, --tensor-split (default: 0) -r, --repetitions (default: 5) -o, --output (default: md) -v, --verbose (default: 0) @@ -43,10 +48,11 @@ options: Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times. ``` -llama-bench can perform two types of tests: +llama-bench can perform three types of tests: - Prompt processing (pp): processing a prompt in batches (`-p`) - Text generation (tg): generating a sequence of tokens (`-n`) +- Prompt processing + text generation (pg): processing a prompt followed by generating a sequence of tokens (`-pg`) With the exception of `-r`, `-o` and `-v`, all options can be specified multiple times to run multiple tests. Each pp and tg test is run with all combinations of the specified options. To specify multiple values for an option, the values can be separated by commas (e.g. `-n 16,32`), or the option can be specified multiple times (e.g. `-n 16 -n 32`).