From 4fc8409a71857a2b3771716cbd6ec75f1da57f68 Mon Sep 17 00:00:00 2001 From: FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> Date: Fri, 15 Nov 2024 03:36:03 +1300 Subject: [PATCH] Update llama.cpp --- src/llama.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index bd41787bd..dc97ca50e 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2907,12 +2907,15 @@ struct llama_model { // for quantize-stats only std::vector> tensors_by_name; - uint64_t n_elements = 0; - size_t n_bytes = 0; - int64_t t_load_us = 0; int64_t t_start_us = 0; + // total number of parameters in the model + uint64_t n_elements = 0; + + // total size of all the tensors in the model in bytes + size_t n_bytes = 0; + // keep track of loaded lora adapters std::set lora_adapters; @@ -4279,7 +4282,7 @@ struct llama_model_loader { int n_created = 0; uint64_t n_elements = 0; - size_t n_bytes = 0; + size_t n_bytes = 0; bool use_mmap = false; bool check_tensors; @@ -5347,7 +5350,7 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){ } } -static void llm_load_stats(llama_model_loader &ml, llama_model &model) { +static void llm_load_stats(llama_model_loader & ml, llama_model & model) { model.n_elements = ml.n_elements; model.n_bytes = ml.n_bytes; } @@ -19958,11 +19961,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu llama_model_ftype_name(model->ftype).c_str()); } -size_t llama_model_size(const struct llama_model *model) { +size_t llama_model_size(const struct llama_model * model) { return model->n_bytes; } -uint64_t llama_model_n_params(const struct llama_model *model) { +uint64_t llama_model_n_params(const struct llama_model * model) { return model->n_elements; }