From 138255e7614cb20545dfa6e1874bc4a3cdd77ad9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 3 Jan 2025 14:42:28 +0200 Subject: [PATCH] llama : change `llama_load_model_from_file` -> `llama_model_load_from_file` ggml-ci --- common/common.cpp | 4 ++-- examples/batched-bench/batched-bench.cpp | 2 +- examples/batched/batched.cpp | 2 +- examples/gritlm/gritlm.cpp | 2 +- examples/llama-bench/llama-bench.cpp | 2 +- examples/llava/llava-cli.cpp | 2 +- examples/llava/minicpmv-cli.cpp | 2 +- examples/llava/qwen2vl-cli.cpp | 2 +- examples/passkey/passkey.cpp | 2 +- examples/quantize-stats/quantize-stats.cpp | 2 +- examples/run/run.cpp | 2 +- examples/simple-chat/simple-chat.cpp | 2 +- examples/simple/simple.cpp | 2 +- examples/tokenize/tokenize.cpp | 2 +- include/llama.h | 7 ++++++- src/llama.cpp | 6 ++++++ tests/test-autorelease.cpp | 2 +- tests/test-model-load-cancel.cpp | 2 +- tests/test-tokenizer-0.cpp | 2 +- tests/test-tokenizer-1-bpe.cpp | 2 +- tests/test-tokenizer-1-spm.cpp | 2 +- 21 files changed, 32 insertions(+), 21 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 98f96f940..2b6c01b0f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -846,7 +846,7 @@ struct common_init_result common_init_from_params(common_params & params) { } else if (!params.model_url.empty()) { model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams); } else { - model = llama_load_model_from_file(params.model.c_str(), mparams); + model = llama_model_load_from_file(params.model.c_str(), mparams); } if (model == NULL) { @@ -1411,7 +1411,7 @@ struct llama_model * common_load_model_from_url( } } - return llama_load_model_from_file(local_path.c_str(), params); + return llama_model_load_from_file(local_path.c_str(), params); } struct llama_model * common_load_model_from_hf( diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index 07fd28047..dd75ff9f1 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -38,7 +38,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); if (model == NULL) { fprintf(stderr , "%s: error: unable to load model\n" , __func__); diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index d532e610e..ea9e9274c 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -41,7 +41,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: error: unable to load model\n" , __func__); diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index fcf17d31d..4d2db5624 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -165,7 +165,7 @@ int main(int argc, char * argv[]) { llama_backend_init(); - llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); + llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams); // create generation context llama_context * ctx = llama_new_context_with_model(model, cparams); diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 228af536c..2a0916766 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -1529,7 +1529,7 @@ int main(int argc, char ** argv) { llama_model_free(lmodel); } - lmodel = llama_load_model_from_file(inst.model.c_str(), inst.to_llama_mparams()); + lmodel = llama_model_load_from_file(inst.model.c_str(), inst.to_llama_mparams()); if (lmodel == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, inst.model.c_str()); return 1; diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index c09f46156..27215a42e 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -221,7 +221,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; diff --git a/examples/llava/minicpmv-cli.cpp b/examples/llava/minicpmv-cli.cpp index b07e42dae..2342bdd09 100644 --- a/examples/llava/minicpmv-cli.cpp +++ b/examples/llava/minicpmv-cli.cpp @@ -31,7 +31,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; diff --git a/examples/llava/qwen2vl-cli.cpp b/examples/llava/qwen2vl-cli.cpp index db8749f62..f3e5d66e2 100644 --- a/examples/llava/qwen2vl-cli.cpp +++ b/examples/llava/qwen2vl-cli.cpp @@ -310,7 +310,7 @@ static struct llama_model * llava_init(common_params * params) { llama_model_params model_params = common_model_params_to_llama(*params); - llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params->model.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); return NULL; diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index 59123890a..ea91f376c 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -63,7 +63,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = common_model_params_to_llama(params); - llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params); + llama_model * model = llama_model_load_from_file(params.model.c_str(), model_params); if (model == NULL) { LOG_ERR("%s: unable to load model\n" , __func__); diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 8e89c4247..9bfbb8862 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -309,7 +309,7 @@ int main(int argc, char ** argv) { auto mparams = llama_model_default_params(); mparams.use_mlock = false; - model = llama_load_model_from_file(params.model.c_str(), mparams); + model = llama_model_load_from_file(params.model.c_str(), mparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 75b817272..c52a7961f 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -664,7 +664,7 @@ class LlamaData { "\r%*s" "\rLoading model", get_terminal_width(), " "); - llama_model_ptr model(llama_load_model_from_file(opt.model_.c_str(), opt.model_params)); + llama_model_ptr model(llama_model_load_from_file(opt.model_.c_str(), opt.model_params)); if (!model) { printe("%s: error: unable to load model from file: %s\n", __func__, opt.model_.c_str()); } diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index 50c89bd45..d72f5bcdd 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -69,7 +69,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = llama_model_default_params(); model_params.n_gpu_layers = ngl; - llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params); + llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params); if (!model) { fprintf(stderr , "%s: error: unable to load model\n" , __func__); return 1; diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 15004f1f9..f69117890 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -83,7 +83,7 @@ int main(int argc, char ** argv) { llama_model_params model_params = llama_model_default_params(); model_params.n_gpu_layers = ngl; - llama_model * model = llama_load_model_from_file(model_path.c_str(), model_params); + llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params); if (model == NULL) { fprintf(stderr , "%s: error: unable to load model\n" , __func__); diff --git a/examples/tokenize/tokenize.cpp b/examples/tokenize/tokenize.cpp index 979d885ad..e0422322d 100644 --- a/examples/tokenize/tokenize.cpp +++ b/examples/tokenize/tokenize.cpp @@ -333,7 +333,7 @@ int main(int raw_argc, char ** raw_argv) { llama_model_params model_params = llama_model_default_params(); model_params.vocab_only = true; - llama_model * model = llama_load_model_from_file(model_path, model_params); + llama_model * model = llama_model_load_from_file(model_path, model_params); if (!model) { fprintf(stderr, "Error: could not load model from file '%s'.\n", model_path); return 1; diff --git a/include/llama.h b/include/llama.h index ed0d51b8c..592d67b84 100644 --- a/include/llama.h +++ b/include/llama.h @@ -413,7 +413,12 @@ extern "C" { // Call once at the end of the program - currently only used for MPI LLAMA_API void llama_backend_free(void); - LLAMA_API struct llama_model * llama_load_model_from_file( + DEPRECATED(LLAMA_API struct llama_model * llama_load_model_from_file( + const char * path_model, + struct llama_model_params params), + "use llama_model_load_from_file instead"); + + LLAMA_API struct llama_model * llama_model_load_from_file( const char * path_model, struct llama_model_params params); diff --git a/src/llama.cpp b/src/llama.cpp index 691a57105..2b6890b2c 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -11456,6 +11456,12 @@ int64_t llama_time_us(void) { struct llama_model * llama_load_model_from_file( const char * path_model, struct llama_model_params params) { + return llama_model_load_from_file(path_model, params); +} + +struct llama_model * llama_model_load_from_file( + const char * path_model, + struct llama_model_params params) { ggml_time_init(); llama_model * model = new llama_model; diff --git a/tests/test-autorelease.cpp b/tests/test-autorelease.cpp index 72e0b06ad..ba084a91a 100644 --- a/tests/test-autorelease.cpp +++ b/tests/test-autorelease.cpp @@ -13,7 +13,7 @@ int main(int argc, char ** argv) { std::thread([&model_path]() { llama_backend_init(); - auto * model = llama_load_model_from_file(model_path, llama_model_default_params()); + auto * model = llama_model_load_from_file(model_path, llama_model_default_params()); auto * ctx = llama_new_context_with_model(model, llama_context_default_params()); llama_free(ctx); llama_model_free(model); diff --git a/tests/test-model-load-cancel.cpp b/tests/test-model-load-cancel.cpp index 858535c3c..9095826fa 100644 --- a/tests/test-model-load-cancel.cpp +++ b/tests/test-model-load-cancel.cpp @@ -21,7 +21,7 @@ int main(int argc, char *argv[] ) { (void) ctx; return progress > 0.50; }; - auto * model = llama_load_model_from_file(model_path, params); + auto * model = llama_model_load_from_file(model_path, params); llama_backend_free(); return model == nullptr ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index e1aae5cb0..121c2c60c 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -152,7 +152,7 @@ int main(int argc, char **argv) { mparams.vocab_only = true; - model = llama_load_model_from_file(fname.c_str(), mparams); + model = llama_model_load_from_file(fname.c_str(), mparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 4b9a880c4..5718fab04 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -46,7 +46,7 @@ int main(int argc, char **argv) { mparams.vocab_only = true; - model = llama_load_model_from_file(fname.c_str(), mparams); + model = llama_model_load_from_file(fname.c_str(), mparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); diff --git a/tests/test-tokenizer-1-spm.cpp b/tests/test-tokenizer-1-spm.cpp index dcd8c39e7..ac05387c9 100644 --- a/tests/test-tokenizer-1-spm.cpp +++ b/tests/test-tokenizer-1-spm.cpp @@ -34,7 +34,7 @@ int main(int argc, char ** argv) { mparams.vocab_only = true; - model = llama_load_model_from_file(fname.c_str(), mparams); + model = llama_model_load_from_file(fname.c_str(), mparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());