llama : make model stateless and context stateful (llama_state) (#1797)

* llama : make model stateless and context stateful * llama : minor cleanup * llama : update internal API declaration * Apply suggestions from code review fix style Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Missing model memory release * Fix style * Add deprecated warning for public API function llama_init_from_file * Update public API use cases: move away from deprecated llama_init_from_file * Deprecate public API function llama_apply_lora_from_file --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2023-06-24 11:47:58 +03:00 · 2023-06-24 11:47:58 +03:00 · 527b6fba1d
commit 527b6fba1d
parent d7b7484f74
13 changed files with 244 additions and 92 deletions
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@ -28,6 +28,7 @@ int main(int argc, char **argv) {

    fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());

+    llama_model * model;
    llama_context * ctx;

    // load the vocab
@ -36,10 +37,18 @@ int main(int argc, char **argv) {

        lparams.vocab_only = true;

-        ctx = llama_init_from_file(fname.c_str(), lparams);
+        model = llama_load_model_from_file(fname.c_str(), lparams);
+
+        if (model == NULL) {
+            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
+            return 1;
+        }
+
+        ctx = llama_new_context_with_model(model, lparams);

        if (ctx == NULL) {
            fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
+            llama_free_model(model);
            return 1;
        }
    }
@ -48,6 +57,8 @@ int main(int argc, char **argv) {

    if (n_vocab != 32000) {
        fprintf(stderr, "%s : expected 32000 tokens, got %d\n", __func__, n_vocab);
+        llama_free_model(model);
+        llama_free(ctx);
        return 2;
    }

@ -77,10 +88,13 @@ int main(int argc, char **argv) {
            }
            fprintf(stderr, "\n");

+            llama_free_model(model);
+            llama_free(ctx);
            return 3;
        }
    }

+    llama_free_model(model);
    llama_free(ctx);

    return 0;