From 831c97efc7480a5815832b395775f5219224d75a Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Wed, 10 Apr 2024 22:42:04 +0200 Subject: [PATCH] common: allow the warmup to be disabled in llama_init_from_gpt_params --- common/common.cpp | 2 +- common/common.h | 1 + examples/ggml-debug/ggml-debug.cpp | 1 + examples/imatrix/imatrix.cpp | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/common/common.cpp b/common/common.cpp index f3001b41f..dda514785 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -2194,7 +2194,7 @@ std::tuple llama_init_from_gpt_par params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; } - { + if (params.warmup) { LOG("warming up the model with an empty run\n"); std::vector tmp = { llama_token_bos(model), llama_token_eos(model), }; diff --git a/common/common.h b/common/common.h index dafc7b1ce..65272b0ba 100644 --- a/common/common.h +++ b/common/common.h @@ -159,6 +159,7 @@ struct gpt_params { bool infill = false; // use infill mode bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes bool no_kv_offload = false; // disable KV offloading + bool warmup = true; // warmup run std::string cache_type_k = "f16"; // KV cache data type for the K std::string cache_type_v = "f16"; // KV cache data type for the V diff --git a/examples/ggml-debug/ggml-debug.cpp b/examples/ggml-debug/ggml-debug.cpp index 6d7bd2129..aa2df8c64 100644 --- a/examples/ggml-debug/ggml-debug.cpp +++ b/examples/ggml-debug/ggml-debug.cpp @@ -136,6 +136,7 @@ int main(int argc, char ** argv) { // it will be executed for each node during the graph computation params.cb_eval = ggml_debug; params.cb_eval_user_data = &cb_data; + params.warmup = false; // init llama_model * model; diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 10c0f08a2..ff624c539 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -601,6 +601,7 @@ int main(int argc, char ** argv) { // it will be executed for each node during the graph computation params.cb_eval = ik_collect_imatrix; params.cb_eval_user_data = NULL; + params.warmup = false; // init llama_model * model;