From c39d5ecd2bd88e5b2e660eb56994b38c1edc666c Mon Sep 17 00:00:00 2001
From: Markus Tavenrath <mtavenrath@users.noreply.github.com>
Date: Thu, 13 Jun 2024 15:55:23 +0200
Subject: [PATCH] Apply suggestions from code review

Co-authored-by: slaren <slarengh@gmail.com>
---
 llama.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index a6eb79c99..df4944595 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3851,7 +3851,7 @@ struct llama_model_loader {
         size_t buffer_idx = 0; // buffer to use for async loads
 
         ggml_backend_t cuda_backend = nullptr;
-        if (!use_mmap) {
+        if (!use_mmap && !check_tensors) {
             // When not using mmaped io use async uploads from pinned memory to GPU memory.
             // First determine if the CUDA backend is active, and if so, determine the device ID.
             ggml_backend_buffer_t buf = bufs_mmap.count(0) ? bufs_mmap.at(0) : nullptr;
@@ -3939,8 +3939,7 @@ struct llama_model_loader {
 
                         size_t bytes_read = 0;
 
-                        while (bytes_read < n_size)
-                        {
+                        while (bytes_read < n_size) {
                             size_t read_iteration = std::min<size_t>(buffer_size, n_size - bytes_read);
 
                             ggml_backend_event_synchronize(events[buffer_idx]);
@@ -3982,7 +3981,6 @@ struct llama_model_loader {
         }
 #endif
 
-
         // check validation results
         bool validation_failed = false;
         for (auto & future : validation_result) {