common: allow the warmup to be disabled in llama_init_from_gpt_params
This commit is contained in:
parent
52a8e0640a
commit
831c97efc7
4 changed files with 4 additions and 1 deletions
|
@ -2194,7 +2194,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||||
params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
if (params.warmup) {
|
||||||
LOG("warming up the model with an empty run\n");
|
LOG("warming up the model with an empty run\n");
|
||||||
|
|
||||||
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
|
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
|
||||||
|
|
|
@ -159,6 +159,7 @@ struct gpt_params {
|
||||||
bool infill = false; // use infill mode
|
bool infill = false; // use infill mode
|
||||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
|
bool warmup = true; // warmup run
|
||||||
|
|
||||||
std::string cache_type_k = "f16"; // KV cache data type for the K
|
std::string cache_type_k = "f16"; // KV cache data type for the K
|
||||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||||
|
|
|
@ -136,6 +136,7 @@ int main(int argc, char ** argv) {
|
||||||
// it will be executed for each node during the graph computation
|
// it will be executed for each node during the graph computation
|
||||||
params.cb_eval = ggml_debug;
|
params.cb_eval = ggml_debug;
|
||||||
params.cb_eval_user_data = &cb_data;
|
params.cb_eval_user_data = &cb_data;
|
||||||
|
params.warmup = false;
|
||||||
|
|
||||||
// init
|
// init
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
|
|
|
@ -601,6 +601,7 @@ int main(int argc, char ** argv) {
|
||||||
// it will be executed for each node during the graph computation
|
// it will be executed for each node during the graph computation
|
||||||
params.cb_eval = ik_collect_imatrix;
|
params.cb_eval = ik_collect_imatrix;
|
||||||
params.cb_eval_user_data = NULL;
|
params.cb_eval_user_data = NULL;
|
||||||
|
params.warmup = false;
|
||||||
|
|
||||||
// init
|
// init
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue