diff --git a/examples/llama-bench/README.md b/examples/llama-bench/README.md index d02824bfa..374e40a7d 100644 --- a/examples/llama-bench/README.md +++ b/examples/llama-bench/README.md @@ -23,19 +23,23 @@ usage: ./llama-bench [options] options: -h, --help - -m, --model (default: models/7B/ggml-model-q4_0.gguf) - -p, --n-prompt (default: 512) - -n, --n-gen (default: 128) - -b, --batch-size (default: 512) - --memory-f32 <0|1> (default: 0) - -t, --threads (default: 16) - -ngl N, --n-gpu-layers (default: 99) - -mg i, --main-gpu (default: 0) - -mmq, --mul-mat-q <0|1> (default: 1) - -ts, --tensor_split - -r, --repetitions (default: 5) - -o, --output (default: md) - -v, --verbose (default: 0) + -m, --model (default: models/7B/ggml-model-q4_0.gguf) + -p, --n-prompt (default: 512) + -n, --n-gen (default: 128) + -b, --batch-size (default: 512) + -ctk , --cache-type-k (default: f16) + -ctv , --cache-type-v (default: f16) + -t, --threads (default: 112) + -ngl, --n-gpu-layers (default: 99) + -sm, --split-mode (default: layer) + -mg, --main-gpu (default: 0) + -nkvo, --no-kv-offload <0|1> (default: 0) + -mmp, --mmap <0|1> (default: 1) + -mmq, --mul-mat-q <0|1> (default: 1) + -ts, --tensor_split (default: 0) + -r, --repetitions (default: 5) + -o, --output (default: md) + -v, --verbose (default: 0) Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times. ``` @@ -51,6 +55,10 @@ Each test is repeated the number of times given by `-r`, and the results are ave For a description of the other options, see the [main example](../main/README.md). +Note: + +- When using SYCL backend, there would be hang issue in some cases. Please set `--mmp 0`. + ## Examples ### Text generation with different models