Apply suggestions from code review

Co-authored-by: slaren <slarengh@gmail.com>
2024-06-13 15:55:23 +02:00 · 2024-06-13 15:55:23 +02:00 · c39d5ecd2b
commit c39d5ecd2b
parent 86869fbdab
1 changed files with 2 additions and 4 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -3851,7 +3851,7 @@ struct llama_model_loader {
        size_t buffer_idx = 0; // buffer to use for async loads

        ggml_backend_t cuda_backend = nullptr;
-        if (!use_mmap) {
+        if (!use_mmap && !check_tensors) {
            // When not using mmaped io use async uploads from pinned memory to GPU memory.
            // First determine if the CUDA backend is active, and if so, determine the device ID.
            ggml_backend_buffer_t buf = bufs_mmap.count(0) ? bufs_mmap.at(0) : nullptr;
@ -3939,8 +3939,7 @@ struct llama_model_loader {

                        size_t bytes_read = 0;

-                        while (bytes_read < n_size)
-                        {
+                        while (bytes_read < n_size) {
                            size_t read_iteration = std::min<size_t>(buffer_size, n_size - bytes_read);

                            ggml_backend_event_synchronize(events[buffer_idx]);
@ -3982,7 +3981,6 @@ struct llama_model_loader {
        }
 #endif

-
        // check validation results
        bool validation_failed = false;
        for (auto & future : validation_result) {