From 36fed7af50c624655aee2218b275fbc6057b5c21 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Thu, 29 Feb 2024 10:38:36 +0100 Subject: [PATCH] remove: mul_mat_q in compare llama bench and usage --- examples/batched-bench/batched-bench.cpp | 4 ++-- scripts/compare-llama-bench.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 0b6f1314d..19aff18ae 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -32,9 +32,9 @@ int main(int argc, char ** argv) { gpt_params params; if (argc == 1 || argv[1][0] == '-') { - printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] [MMQ] \n" , argv[0]); + printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] \n" , argv[0]); printf(" , and PL are comma-separated lists of numbers without spaces\n\n"); - printf(" example: %s ggml-model-f16.gguf 2048 0 999 0 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]); + printf(" example: %s ggml-model-f16.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]); return 1 ; } diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index 70737f976..39c3e52e5 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -31,7 +31,7 @@ PRETTY_NAMES = { "model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters", "n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO", - "mul_mat_q": "MMQ", "tensor_split": "Tensor split" + "tensor_split": "Tensor split" } DEFAULT_SHOW = ["model_type"] # Always show these properties by default.