llama : make model stateless and context stateful (llama_state) (#1797)

* llama : make model stateless and context stateful * llama : minor cleanup * llama : update internal API declaration * Apply suggestions from code review fix style Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Missing model memory release * Fix style * Add deprecated warning for public API function llama_init_from_file * Update public API use cases: move away from deprecated llama_init_from_file * Deprecate public API function llama_apply_lora_from_file --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2023-06-24 11:47:58 +03:00 · 2023-06-24 11:47:58 +03:00 · 527b6fba1d
commit 527b6fba1d
parent d7b7484f74
13 changed files with 244 additions and 92 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -115,6 +115,7 @@ struct llama_server_context {
    std::vector<llama_token> embd;
    std::vector<llama_token> last_n_tokens;

+    llama_model * model = nullptr;
    llama_context * ctx = nullptr;
    gpt_params params;

@ -130,6 +131,10 @@ struct llama_server_context {
            llama_free(ctx);
            ctx = nullptr;
        }
+        if (model) {
+            llama_free_model(model);
+            model = nullptr;
+        }
    }

    void rewind() {
@ -150,8 +155,8 @@ struct llama_server_context {

    bool loadModel(const gpt_params & params_) {
        params = params_;
-        ctx = llama_init_from_gpt_params(params);
-        if (ctx == nullptr) {
+        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        if (model == nullptr) {
            LOG_ERROR("unable to load model", { { "model", params_.model } });
            return false;
        }