diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e30db66e9..0f4b62c43 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1275,7 +1275,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) { const bool use_mmap_buffer = true; - LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__, use_mmap_buffer ? "true" : "false"); + LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__, ml.use_mmap ? "true" : "false"); // build a list of buffer types for the CPU and GPU devices pimpl->cpu_buft_list = make_cpu_buft_list(devices); diff --git a/src/llama.cpp b/src/llama.cpp index 3b6a21d81..607f27861 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -9430,7 +9430,6 @@ static struct llama_model * llama_model_load_from_file_impl( struct llama_model_params params) { ggml_time_init(); - unsigned cur_percentage = 0; if (params.progress_callback == NULL) { params.progress_callback_user_data = &cur_percentage;