From 49822bab15a584343ff3f01333beb378557215a6 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Thu, 16 Jan 2025 12:44:21 +0100 Subject: [PATCH] update --- src/llama-model-loader.cpp | 74 ++++++++++++++++++++------------------ src/llama-model-loader.h | 4 --- src/llama.cpp | 1 + 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index 2977a3256..75073bf61 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -64,6 +64,33 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { } } +// return a list of splits for a given path +// for example, given "-00002-of-00004.gguf", returns list of all 4 splits +static std::vector llama_get_list_splits(const std::string & path, const int idx, const int n_split) { + std::vector paths; + std::string split_prefix; + std::vector buf(llama_path_max(), 0); + + { + int ret = llama_split_prefix(buf.data(), buf.size(), path.c_str(), idx, n_split); + if (!ret) { + throw std::runtime_error(format("invalid split file name: %s", path.c_str())); + } + split_prefix = std::string(buf.data(), ret); + } + + if (split_prefix.empty()) { + throw std::runtime_error(format("invalid split file: %s", path.c_str())); + } + + for (int idx = 0; idx < n_split; ++idx) { + int ret = llama_split_path(buf.data(), buf.size(), split_prefix.c_str(), idx, n_split); + paths.push_back(std::string(buf.data(), ret)); + } + + return paths; +} + namespace GGUFMeta { template struct GKV_Base_Type { @@ -466,16 +493,7 @@ llama_model_loader::llama_model_loader( // Load additional GGML contexts if (n_split > 1) { - // generate list of splits if needed - if (splits.empty()) { - splits = llama_get_list_splits(fname, n_split); - } - - // in case user give a custom list of splits, check if it matches the expected number - if (n_split != (uint16_t)splits.size()) { - throw std::runtime_error(format("invalid split count, given: %zu splits, but expected %d", splits.size(), n_split)); - } - + // make sure the main file is loaded first uint16_t idx = 0; const std::string kv_split_no = llm_kv(LLM_KV_SPLIT_NO); get_key(kv_split_no, idx); @@ -483,10 +501,21 @@ llama_model_loader::llama_model_loader( throw std::runtime_error(format("illegal split file idx: %d (file: %s), model must be loaded with the first split", idx, fname.c_str())); } + // generate list of splits if needed + if (splits.empty()) { + splits = llama_get_list_splits(fname, idx, n_split); + } + + // in case user give a custom list of splits, check if it matches the expected number + if (n_split != (uint16_t)splits.size()) { + throw std::runtime_error(format("invalid split count, given: %zu splits, but expected %d", splits.size(), n_split)); + } + if (trace > 0) { LLAMA_LOG_INFO("%s: loading additional %d GGUFs\n", __func__, n_split); } + // load other splits for (idx = 1; idx < n_split; idx++) { const char * fname_split = splits[idx].c_str(); @@ -1093,28 +1122,3 @@ void llama_model_loader::print_info() const { LLAMA_LOG_INFO("%s: file size = %.2f GiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements); } } - -std::vector llama_get_list_splits(const std::string & path, const int n_split) { - std::vector paths; - std::string split_prefix; - std::vector buf(llama_path_max(), 0); - - // brute force to find the split prefix - for (int idx = 0; idx < n_split; ++idx) { - int ret = llama_split_prefix(buf.data(), buf.size(), path.c_str(), idx, n_split); - if (ret) { - split_prefix = std::string(buf.data(), ret); - } - } - - if (split_prefix.empty()) { - throw std::runtime_error(format("invalid split file: %s", path.c_str())); - } - - for (int idx = 0; idx < n_split; ++idx) { - int ret = llama_split_path(buf.data(), buf.size(), split_prefix.c_str(), idx, n_split); - paths.push_back(std::string(buf.data(), ret)); - } - - return paths; -} diff --git a/src/llama-model-loader.h b/src/llama-model-loader.h index 4dfe1dced..fe35404b2 100644 --- a/src/llama-model-loader.h +++ b/src/llama-model-loader.h @@ -165,7 +165,3 @@ struct llama_model_loader { void print_info() const; }; - -// return a list of splits for a given path -// for example, given "-00002-of-00004.gguf", returns list of all 4 splits -std::vector llama_get_list_splits(const std::string & path, const int n_split); diff --git a/src/llama.cpp b/src/llama.cpp index eb2ec257e..fede23d19 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -9496,6 +9496,7 @@ static struct llama_model * llama_model_load_from_file_impl( return model; } +// deprecated struct llama_model * llama_load_model_from_file( const char * path_model, struct llama_model_params params) {