From 30c52f32b8c256e8f0b0bbcc0932d8463d8aebde Mon Sep 17 00:00:00 2001 From: jianyuzh Date: Fri, 2 Feb 2024 00:18:36 +0800 Subject: [PATCH] mv position to reduce model reload --- examples/llama-bench/llama-bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index f385cc7c1..e36c061a2 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -534,12 +534,12 @@ static std::vector get_cmd_params_instances(const cmd_param for (const auto & sm : params.split_mode) for (const auto & mg : params.main_gpu) for (const auto & ts : params.tensor_split) + for (const auto & mmp : params.use_mmap) for (const auto & nb : params.n_batch) for (const auto & tk : params.type_k) for (const auto & tv : params.type_v) for (const auto & mmq : params.mul_mat_q) for (const auto & nkvo : params.no_kv_offload) - for (const auto & mmp : params.use_mmap) for (const auto & nt : params.n_threads) { for (const auto & n_prompt : params.n_prompt) { if (n_prompt == 0) {