save number of parameters and the size in llama_model

https://github.com/ggerganov/llama.cpp/issues/10285
2024-11-14 20:40:14 +13:00 · 2024-11-14 20:40:14 +13:00 · 0a0f91df61
commit 0a0f91df61
parent af148c9386
1 changed files with 15 additions and 12 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -2907,6 +2907,9 @@ struct llama_model {
    // for quantize-stats only
    std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;

+    uint64_t n_bytes = 0;
+    uint64_t n_elements = 0;
+
    int64_t t_load_us = 0;
    int64_t t_start_us = 0;

@ -5344,6 +5347,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
    }
 }

+static void llm_load_stats(llama_model_loader &ml, llama_model &model) {
+    model.n_elements = ml.n_elements;
+    model.n_bytes = ml.n_bytes;
+}
+
 static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
    model.arch = ml.get_arch();
    if (model.arch == LLM_ARCH_UNKNOWN) {
@ -9252,6 +9260,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
            throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
        }

+        llm_load_stats(ml, model);
+        
        llm_load_print_meta(ml, model);

        if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@ -18597,6 +18607,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    llama_model model;
    llm_load_arch(ml, model);
    llm_load_hparams(ml, model);
+    llm_load_stats(ml, model);

    struct quantize_state_internal qs(model, params);

@ -19948,20 +19959,12 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
            llama_model_ftype_name(model->ftype).c_str());
 }

-uint64_t llama_model_size(const struct llama_model * model) {
-    uint64_t size = 0;
-    for (const auto & it : model->tensors_by_name) {
-        size += ggml_nbytes(it.second);
-    }
-    return size;
+uint64_t llama_model_size(const struct llama_model *model) {
+    return model->n_bytes;
 }

-uint64_t llama_model_n_params(const struct llama_model * model) {
-    uint64_t nparams = 0;
-    for (const auto & it : model->tensors_by_name) {
-        nparams += ggml_nelements(it.second);
-    }
-    return nparams;
+uint64_t llama_model_n_params(const struct llama_model *model) {
+    return model->n_elements;
 }

 struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {