Pass pointer to params in llama_init_from_file

Especially with golang bindings, calling by value has the side-effect of values not being copied correctly. This has been observed with the bindings in https://github.com/go-skynet/go-llama.cpp/pull/105.
2023-06-16 23:43:36 +02:00 · 2023-06-16 23:43:36 +02:00 · 200892a3a5
commit 200892a3a5
parent d411968e99
7 changed files with 9 additions and 8 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -555,7 +555,7 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
    lparams.logits_all   = params.perplexity;
    lparams.embedding    = params.embedding;
-    llama_context * lctx = llama_init_from_file(params.model.c_str(), lparams);
+    llama_context * lctx = llama_init_from_file(params.model.c_str(), &lparams);
    if (lctx == NULL) {
        fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@ -330,7 +330,7 @@ int main(int argc, char ** argv) {
        lparams.f16_kv     = false;
        lparams.use_mlock  = false;
-        ctx = llama_init_from_file(params.model.c_str(), lparams);
+        ctx = llama_init_from_file(params.model.c_str(), &lparams);
        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@ -35,7 +35,7 @@ int main(int argc, char ** argv) {
    auto last_n_tokens_data = std::vector<llama_token>(params.repeat_last_n, 0);
    // init
-    auto ctx = llama_init_from_file(params.model.c_str(), lparams);
+    auto ctx = llama_init_from_file(params.model.c_str(), &lparams);
    auto tokens = std::vector<llama_token>(params.n_ctx);
    auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true);
@ -95,7 +95,7 @@ int main(int argc, char ** argv) {
    llama_free(ctx);
    // load new model
-    auto ctx2 = llama_init_from_file(params.model.c_str(), lparams);
+    auto ctx2 = llama_init_from_file(params.model.c_str(), &lparams);
    // Load state (rng, logits, embedding and kv_cache) from file
    {
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -3054,7 +3054,7 @@ int main(int argc, char ** argv) {
    struct llama_context_params llama_params = llama_context_default_params();
    llama_params.vocab_only = true;
-    struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, llama_params);
+    struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, &llama_params);
    struct llama_vocab vocab;
    {
--- a/llama.cpp
+++ b/llama.cpp
@ -2618,8 +2618,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 struct llama_context * llama_init_from_file(
                             const char * path_model,
-            struct llama_context_params   params) {
+         const struct llama_context_params *  params_ptr) {
    ggml_time_init();
    struct llama_context_params params = *params_ptr;
    llama_context * ctx = new llama_context;
--- a/llama.h
+++ b/llama.h
@ -142,7 +142,7 @@ extern "C" {
    // Return NULL on failure
    LLAMA_API struct llama_context * llama_init_from_file(
                             const char * path_model,
-            struct llama_context_params   params);
+            const struct llama_context_params * params);
    // Frees all allocated memory
    LLAMA_API void llama_free(struct llama_context * ctx);
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@ -36,7 +36,7 @@ int main(int argc, char **argv) {
        lparams.vocab_only = true;
-        ctx = llama_init_from_file(fname.c_str(), lparams);
+        ctx = llama_init_from_file(fname.c_str(), &lparams);
        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());