diff --git a/llama.cpp b/llama.cpp index 0124e424f..0c9d3f745 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2435,6 +2435,10 @@ struct llama_model_loader { mapping->unmap(0, mmap_first); mapping->unmap(mmap_last, mapping->size - mmap_last); } + + if (progress_callback) { + progress_callback(1.0f, progress_callback_user_data); + } } }; @@ -3691,10 +3695,6 @@ static void llm_load_tensors( ml.load_all_data(ctx, progress_callback, progress_callback_user_data, buf_mmap, use_mlock ? &model.mlock_mmap : NULL); - if (progress_callback) { - progress_callback(1.0f, progress_callback_user_data); - } - model.mapping = std::move(ml.mapping); // loading time will be recalculate after the first eval, so