From 831c97efc7480a5815832b395775f5219224d75a Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Wed, 10 Apr 2024 22:42:04 +0200
Subject: [PATCH] common: allow the warmup to be disabled in
 llama_init_from_gpt_params

---
 common/common.cpp                  | 2 +-
 common/common.h                    | 1 +
 examples/ggml-debug/ggml-debug.cpp | 1 +
 examples/imatrix/imatrix.cpp       | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/common/common.cpp b/common/common.cpp
index f3001b41f..dda514785 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2194,7 +2194,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
         params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
     }
 
-    {
+    if (params.warmup) {
         LOG("warming up the model with an empty run\n");
 
         std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
diff --git a/common/common.h b/common/common.h
index dafc7b1ce..65272b0ba 100644
--- a/common/common.h
+++ b/common/common.h
@@ -159,6 +159,7 @@ struct gpt_params {
     bool infill            = false; // use infill mode
     bool dump_kv_cache     = false; // dump the KV cache contents for debugging purposes
     bool no_kv_offload     = false; // disable KV offloading
+    bool warmup            = true;  // warmup run
 
     std::string cache_type_k = "f16"; // KV cache data type for the K
     std::string cache_type_v = "f16"; // KV cache data type for the V
diff --git a/examples/ggml-debug/ggml-debug.cpp b/examples/ggml-debug/ggml-debug.cpp
index 6d7bd2129..aa2df8c64 100644
--- a/examples/ggml-debug/ggml-debug.cpp
+++ b/examples/ggml-debug/ggml-debug.cpp
@@ -136,6 +136,7 @@ int main(int argc, char ** argv) {
     // it will be executed for each node during the graph computation
     params.cb_eval = ggml_debug;
     params.cb_eval_user_data = &cb_data;
+    params.warmup = false;
 
     // init
     llama_model * model;
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 10c0f08a2..ff624c539 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -601,6 +601,7 @@ int main(int argc, char ** argv) {
     // it will be executed for each node during the graph computation
     params.cb_eval = ik_collect_imatrix;
     params.cb_eval_user_data = NULL;
+    params.warmup = false;
 
     // init
     llama_model * model;