diff --git a/examples/common.cpp b/examples/common.cpp index 055383bef..7cf48e821 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -555,7 +555,7 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) { lparams.logits_all = params.perplexity; lparams.embedding = params.embedding; - llama_context * lctx = llama_init_from_file(params.model.c_str(), lparams); + llama_context * lctx = llama_init_from_file(params.model.c_str(), &lparams); if (lctx == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6b8018ee2..a7c1e873a 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -330,7 +330,7 @@ int main(int argc, char ** argv) { lparams.f16_kv = false; lparams.use_mlock = false; - ctx = llama_init_from_file(params.model.c_str(), lparams); + ctx = llama_init_from_file(params.model.c_str(), &lparams); if (ctx == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index da4d37ad0..07ee6750d 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -35,7 +35,7 @@ int main(int argc, char ** argv) { auto last_n_tokens_data = std::vector(params.repeat_last_n, 0); // init - auto ctx = llama_init_from_file(params.model.c_str(), lparams); + auto ctx = llama_init_from_file(params.model.c_str(), &lparams); auto tokens = std::vector(params.n_ctx); auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true); @@ -95,7 +95,7 @@ int main(int argc, char ** argv) { llama_free(ctx); // load new model - auto ctx2 = llama_init_from_file(params.model.c_str(), lparams); + auto ctx2 = llama_init_from_file(params.model.c_str(), &lparams); // Load state (rng, logits, embedding and kv_cache) from file { diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 7ec85951a..1c7a06c21 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -3054,7 +3054,7 @@ int main(int argc, char ** argv) { struct llama_context_params llama_params = llama_context_default_params(); llama_params.vocab_only = true; - struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, llama_params); + struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, &llama_params); struct llama_vocab vocab; { diff --git a/llama.cpp b/llama.cpp index 81f047ed2..0629e8738 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2618,8 +2618,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s struct llama_context * llama_init_from_file( const char * path_model, - struct llama_context_params params) { + const struct llama_context_params * params_ptr) { ggml_time_init(); + struct llama_context_params params = *params_ptr; llama_context * ctx = new llama_context; diff --git a/llama.h b/llama.h index 1241ba6c0..faf2675f1 100644 --- a/llama.h +++ b/llama.h @@ -142,7 +142,7 @@ extern "C" { // Return NULL on failure LLAMA_API struct llama_context * llama_init_from_file( const char * path_model, - struct llama_context_params params); + const struct llama_context_params * params); // Frees all allocated memory LLAMA_API void llama_free(struct llama_context * ctx); diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index ab1538a0c..b405df8e6 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -36,7 +36,7 @@ int main(int argc, char **argv) { lparams.vocab_only = true; - ctx = llama_init_from_file(fname.c_str(), lparams); + ctx = llama_init_from_file(fname.c_str(), &lparams); if (ctx == NULL) { fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());