fix llama_reset_model_time

2024-09-19 11:30:47 +08:00 · 2024-09-19 11:30:47 +08:00 · 216e7d9648
commit 216e7d9648
parent 24bea1549b
3 changed files with 7 additions and 7 deletions
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@ -1558,7 +1558,8 @@ int main(int argc, char ** argv) {
            }
            prev_inst = &inst;
        } else {
-            llama_model_reset_time(lmodel);
+            // ensure load_time dost not accumulate in llama_bench when not loading the same model
            llama_reset_model_time(lmodel);
        }
        llama_context * ctx = llama_new_context_with_model(lmodel, inst.to_llama_cparams());
--- a/include/llama.h
+++ b/include/llama.h
@ -414,7 +414,7 @@ extern "C" {
                             const char * path_model,
              struct llama_model_params   params);
-    LLAMA_API void llama_model_reset_time(struct llama_model * model);
+    LLAMA_API void llama_reset_model_time(struct llama_model * model);
    LLAMA_API void llama_free_model(struct llama_model * model);
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -8809,11 +8809,6 @@ static bool llm_load_tensors(
    return true;
 }
 void llama_model_reset_time(llama_model * model) {
    model->t_start_us = ggml_time_us();
    model->t_load_us = ggml_time_us() - model->t_start_us;
 }
 // Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
 static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
    model.t_start_us = ggml_time_us();
@ -18695,6 +18690,10 @@ struct llama_model * llama_load_model_from_file(
    return model;
 }
 void llama_reset_model_time(llama_model * model) {
    model->t_start_us = ggml_time_us() - model->t_load_us;
 }
 void llama_free_model(struct llama_model * model) {
    delete model;
 }