common: allow the warmup to be disabled in llama_init_from_gpt_params
This commit is contained in:
parent
52a8e0640a
commit
831c97efc7
4 changed files with 4 additions and 1 deletions
|
@ -2194,7 +2194,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
|
||||
}
|
||||
|
||||
{
|
||||
if (params.warmup) {
|
||||
LOG("warming up the model with an empty run\n");
|
||||
|
||||
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
|
||||
|
|
|
@ -159,6 +159,7 @@ struct gpt_params {
|
|||
bool infill = false; // use infill mode
|
||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||
bool no_kv_offload = false; // disable KV offloading
|
||||
bool warmup = true; // warmup run
|
||||
|
||||
std::string cache_type_k = "f16"; // KV cache data type for the K
|
||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||
|
|
|
@ -136,6 +136,7 @@ int main(int argc, char ** argv) {
|
|||
// it will be executed for each node during the graph computation
|
||||
params.cb_eval = ggml_debug;
|
||||
params.cb_eval_user_data = &cb_data;
|
||||
params.warmup = false;
|
||||
|
||||
// init
|
||||
llama_model * model;
|
||||
|
|
|
@ -601,6 +601,7 @@ int main(int argc, char ** argv) {
|
|||
// it will be executed for each node during the graph computation
|
||||
params.cb_eval = ik_collect_imatrix;
|
||||
params.cb_eval_user_data = NULL;
|
||||
params.warmup = false;
|
||||
|
||||
// init
|
||||
llama_model * model;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue