From 0261d4f02f718eda03f48b3843e74d31584f9922 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 3 Jan 2025 14:37:28 +0200
Subject: [PATCH] llama : deprecate llama_free_model, add llama_model_free

ggml-ci
---
 common/common.cpp                          | 10 +++++-----
 examples/batched-bench/batched-bench.cpp   |  2 +-
 examples/batched/batched.cpp               |  2 +-
 examples/gritlm/gritlm.cpp                 |  2 +-
 examples/llama-bench/llama-bench.cpp       |  6 +++---
 examples/llava/llava-cli.cpp               |  4 ++--
 examples/llava/minicpmv-cli.cpp            |  2 +-
 examples/llava/qwen2vl-cli.cpp             |  4 ++--
 examples/passkey/passkey.cpp               |  2 +-
 examples/quantize-stats/quantize-stats.cpp |  6 +++---
 examples/simple-chat/simple-chat.cpp       |  2 +-
 examples/simple/simple.cpp                 |  2 +-
 examples/tokenize/tokenize.cpp             |  2 +-
 include/llama-cpp.h                        |  2 +-
 include/llama.h                            |  6 ++++--
 src/llama-model.cpp                        |  4 ++++
 src/llama.cpp                              | 10 +++++-----
 tests/test-autorelease.cpp                 |  2 +-
 tests/test-tokenizer-0.cpp                 |  4 ++--
 tests/test-tokenizer-1-bpe.cpp             |  4 ++--
 tests/test-tokenizer-1-spm.cpp             |  4 ++--
 21 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index 4bb140ee2..98f96f940 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -873,7 +873,7 @@ struct common_init_result common_init_from_params(common_params & params) {
         }
 
         if (!ok) {
-            llama_free_model(model);
+            llama_model_free(model);
 
             return iparams;
         }
@@ -884,7 +884,7 @@ struct common_init_result common_init_from_params(common_params & params) {
     llama_context * lctx = llama_new_context_with_model(model, cparams);
     if (lctx == NULL) {
         LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
-        llama_free_model(model);
+        llama_model_free(model);
         return iparams;
     }
 
@@ -900,7 +900,7 @@ struct common_init_result common_init_from_params(common_params & params) {
         const auto cvec = common_control_vector_load(params.control_vectors);
         if (cvec.n_embd == -1) {
             llama_free(lctx);
-            llama_free_model(model);
+            llama_model_free(model);
 
             return iparams;
         }
@@ -913,7 +913,7 @@ struct common_init_result common_init_from_params(common_params & params) {
                                              params.control_vector_layer_end);
         if (err) {
             llama_free(lctx);
-            llama_free_model(model);
+            llama_model_free(model);
 
             return iparams;
         }
@@ -926,7 +926,7 @@ struct common_init_result common_init_from_params(common_params & params) {
         if (lora == nullptr) {
             LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
             llama_free(lctx);
-            llama_free_model(model);
+            llama_model_free(model);
             return iparams;
         }
 
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index a3b21ad6b..07fd28047 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
     llama_batch_free(batch);
 
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     llama_backend_free();
 
diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp
index e2e01f2d5..d532e610e 100644
--- a/examples/batched/batched.cpp
+++ b/examples/batched/batched.cpp
@@ -236,7 +236,7 @@ int main(int argc, char ** argv) {
 
     llama_sampler_free(smpl);
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     llama_backend_free();
 
diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp
index 18a945b33..fcf17d31d 100644
--- a/examples/gritlm/gritlm.cpp
+++ b/examples/gritlm/gritlm.cpp
@@ -219,7 +219,7 @@ int main(int argc, char * argv[]) {
 
     llama_sampler_free(smpl);
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
     llama_backend_free();
 
     return 0;
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 2338ad106..228af536c 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -1526,7 +1526,7 @@ int main(int argc, char ** argv) {
         // keep the same model between tests when possible
         if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
             if (lmodel) {
-                llama_free_model(lmodel);
+                llama_model_free(lmodel);
             }
 
             lmodel = llama_load_model_from_file(inst.model.c_str(), inst.to_llama_mparams());
@@ -1540,7 +1540,7 @@ int main(int argc, char ** argv) {
         llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());
         if (ctx == NULL) {
             fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, inst.model.c_str());
-            llama_free_model(lmodel);
+            llama_model_free(lmodel);
             return 1;
         }
 
@@ -1626,7 +1626,7 @@ int main(int argc, char ** argv) {
         ggml_threadpool_free_fn(threadpool);
     }
 
-    llama_free_model(lmodel);
+    llama_model_free(lmodel);
 
     if (p) {
         p->print_footer();
diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp
index 2691c6e6b..c09f46156 100644
--- a/examples/llava/llava-cli.cpp
+++ b/examples/llava/llava-cli.cpp
@@ -265,7 +265,7 @@ static void llava_free(struct llava_context * ctx_llava) {
     }
 
     llama_free(ctx_llava->ctx_llama);
-    llama_free_model(ctx_llava->model);
+    llama_model_free(ctx_llava->model);
     llama_backend_free();
 }
 
@@ -323,7 +323,7 @@ int main(int argc, char ** argv) {
         }
     }
 
-    llama_free_model(model);
+    llama_model_free(model);
 
     return 0;
 }
diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp
index e9cbb51ed..b07e42dae 100644
--- a/examples/llava/minicpmv-cli.cpp
+++ b/examples/llava/minicpmv-cli.cpp
@@ -75,7 +75,7 @@ static void llava_free(struct llava_context * ctx_llava) {
     }
 
     llama_free(ctx_llava->ctx_llama);
-    llama_free_model(ctx_llava->model);
+    llama_model_free(ctx_llava->model);
     llama_backend_free();
 }
 
diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp
index e86a60280..db8749f62 100644
--- a/examples/llava/qwen2vl-cli.cpp
+++ b/examples/llava/qwen2vl-cli.cpp
@@ -354,7 +354,7 @@ static void llava_free(struct llava_context * ctx_llava) {
     }
 
     llama_free(ctx_llava->ctx_llama);
-    llama_free_model(ctx_llava->model);
+    llama_model_free(ctx_llava->model);
     llama_backend_free();
 }
 
@@ -575,7 +575,7 @@ int main(int argc, char ** argv) {
         }
     }
 
-    llama_free_model(model);
+    llama_model_free(model);
 
     return 0;
 }
diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp
index 09bba708f..59123890a 100644
--- a/examples/passkey/passkey.cpp
+++ b/examples/passkey/passkey.cpp
@@ -266,7 +266,7 @@ int main(int argc, char ** argv) {
     llama_batch_free(batch);
 
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     llama_backend_free();
 
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index ab91d0b40..8e89c4247 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -323,7 +323,7 @@ int main(int argc, char ** argv) {
 
         if (ctx == NULL) {
             fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
-            llama_free_model(model);
+            llama_model_free(model);
             return 1;
         }
     }
@@ -347,7 +347,7 @@ int main(int argc, char ** argv) {
             fprintf(stderr, "%s: error: Quantization should be tested with a float model, "
                 "this model contains already quantized layers (%s is type %d)\n", __func__, kv_tensor.first.c_str(), kv_tensor.second->type);
             llama_free(ctx);
-            llama_free_model(model);
+            llama_model_free(model);
             return 1;
         }
         included_layers++;
@@ -409,7 +409,7 @@ int main(int argc, char ** argv) {
 
 
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
     // report timing
     {
         const int64_t t_main_end_us = ggml_time_us();
diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp
index 7f4da666b..50c89bd45 100644
--- a/examples/simple-chat/simple-chat.cpp
+++ b/examples/simple-chat/simple-chat.cpp
@@ -194,7 +194,7 @@ int main(int argc, char ** argv) {
     }
     llama_sampler_free(smpl);
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     return 0;
 }
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 3288c0250..15004f1f9 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -199,7 +199,7 @@ int main(int argc, char ** argv) {
 
     llama_sampler_free(smpl);
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     return 0;
 }
diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp
index c97e22724..979d885ad 100644
--- a/examples/tokenize/tokenize.cpp
+++ b/examples/tokenize/tokenize.cpp
@@ -398,7 +398,7 @@ int main(int raw_argc, char ** raw_argv) {
     }
     // silence valgrind
     llama_free(ctx);
-    llama_free_model(model);
+    llama_model_free(model);
 
     return 0;
 }
diff --git a/include/llama-cpp.h b/include/llama-cpp.h
index 1500cb2fc..11306b17f 100644
--- a/include/llama-cpp.h
+++ b/include/llama-cpp.h
@@ -9,7 +9,7 @@
 #include "llama.h"
 
 struct llama_model_deleter {
-    void operator()(llama_model * model) { llama_free_model(model); }
+    void operator()(llama_model * model) { llama_model_free(model); }
 };
 
 struct llama_context_deleter {
diff --git a/include/llama.h b/include/llama.h
index 7b305b299..ed0d51b8c 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -417,8 +417,10 @@ extern "C" {
                              const char * path_model,
               struct llama_model_params   params);
 
-    // TODO: rename to llama_model_free
-    LLAMA_API void llama_free_model(struct llama_model * model);
+    DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
+            "use llama_model_free instead");
+
+    LLAMA_API void llama_model_free(struct llama_model * model);
 
     // TODO: rename to llama_init_from_model
     LLAMA_API struct llama_context * llama_new_context_with_model(
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index ace0ba262..747d87a1f 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1976,6 +1976,10 @@ struct llama_model_params llama_model_default_params() {
 }
 
 void llama_free_model(struct llama_model * model) {
+    llama_model_free(model);
+}
+
+void llama_model_free(struct llama_model * model) {
     delete model;
 }
 
diff --git a/src/llama.cpp b/src/llama.cpp
index d7110b90b..691a57105 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -11494,7 +11494,7 @@ struct llama_model * llama_load_model_from_file(
         ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC");
         if (!rpc_reg) {
             LLAMA_LOG_ERROR("%s: failed to find RPC backend\n", __func__);
-            llama_free_model(model);
+            llama_model_free(model);
             return nullptr;
         }
 
@@ -11502,7 +11502,7 @@ struct llama_model * llama_load_model_from_file(
         ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device");
         if (!ggml_backend_rpc_add_device_fn) {
             LLAMA_LOG_ERROR("%s: failed to find RPC device add function\n", __func__);
-            llama_free_model(model);
+            llama_model_free(model);
             return nullptr;
         }
 
@@ -11512,7 +11512,7 @@ struct llama_model * llama_load_model_from_file(
                 model->devices.push_back(dev);
             } else {
                 LLAMA_LOG_ERROR("%s: failed to add RPC device for server '%s'\n", __func__, server.c_str());
-                llama_free_model(model);
+                llama_model_free(model);
                 return nullptr;
             }
         }
@@ -11544,7 +11544,7 @@ struct llama_model * llama_load_model_from_file(
     if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
         if (params.main_gpu < 0 || params.main_gpu >= (int)model->devices.size()) {
             LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %d)\n", __func__, params.main_gpu, (int)model->devices.size());
-            llama_free_model(model);
+            llama_model_free(model);
             return nullptr;
         }
         ggml_backend_dev_t main_gpu = model->devices[params.main_gpu];
@@ -11567,7 +11567,7 @@ struct llama_model * llama_load_model_from_file(
             LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
         }
 
-        llama_free_model(model);
+        llama_model_free(model);
         return nullptr;
     }
 
diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp
index 57fa00011..72e0b06ad 100644
--- a/tests/test-autorelease.cpp
+++ b/tests/test-autorelease.cpp
@@ -16,7 +16,7 @@ int main(int argc, char ** argv) {
         auto * model = llama_load_model_from_file(model_path, llama_model_default_params());
         auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
         llama_free(ctx);
-        llama_free_model(model);
+        llama_model_free(model);
         llama_backend_free();
     }).join();
 
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index 0af85f002..e1aae5cb0 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -165,7 +165,7 @@ int main(int argc, char **argv) {
 
         if (ctx == NULL) {
             fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            llama_free_model(model);
+            llama_model_free(model);
             return 1;
         }
     }
@@ -300,7 +300,7 @@ int main(int argc, char **argv) {
         fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());
     }
 
-    llama_free_model(model);
+    llama_model_free(model);
     llama_free(ctx);
 
     llama_backend_free();
diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp
index 0ff7fc833..4b9a880c4 100644
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
 
         if (ctx == NULL) {
             fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            llama_free_model(model);
+            llama_model_free(model);
             return 1;
         }
     }
@@ -143,7 +143,7 @@ int main(int argc, char **argv) {
         }
     }
 
-    llama_free_model(model);
+    llama_model_free(model);
     llama_free(ctx);
 
     llama_backend_free();
diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp
index 9b0716a43..dcd8c39e7 100644
--- a/tests/test-tokenizer-1-spm.cpp
+++ b/tests/test-tokenizer-1-spm.cpp
@@ -47,7 +47,7 @@ int main(int argc, char ** argv) {
 
         if (ctx == NULL) {
             fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
-            llama_free_model(model);
+            llama_model_free(model);
             return 1;
         }
     }
@@ -113,7 +113,7 @@ int main(int argc, char ** argv) {
         }
     }
 
-    llama_free_model(model);
+    llama_model_free(model);
     llama_free(ctx);
 
     llama_backend_free();