From c39d5ecd2bd88e5b2e660eb56994b38c1edc666c Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Thu, 13 Jun 2024 15:55:23 +0200 Subject: [PATCH] Apply suggestions from code review Co-authored-by: slaren --- llama.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llama.cpp b/llama.cpp index a6eb79c99..df4944595 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3851,7 +3851,7 @@ struct llama_model_loader { size_t buffer_idx = 0; // buffer to use for async loads ggml_backend_t cuda_backend = nullptr; - if (!use_mmap) { + if (!use_mmap && !check_tensors) { // When not using mmaped io use async uploads from pinned memory to GPU memory. // First determine if the CUDA backend is active, and if so, determine the device ID. ggml_backend_buffer_t buf = bufs_mmap.count(0) ? bufs_mmap.at(0) : nullptr; @@ -3939,8 +3939,7 @@ struct llama_model_loader { size_t bytes_read = 0; - while (bytes_read < n_size) - { + while (bytes_read < n_size) { size_t read_iteration = std::min(buffer_size, n_size - bytes_read); ggml_backend_event_synchronize(events[buffer_idx]); @@ -3982,7 +3981,6 @@ struct llama_model_loader { } #endif - // check validation results bool validation_failed = false; for (auto & future : validation_result) {