From 853b3d716dd08c3417d2ad5e8d6f6b03d1ff2cae Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Fri, 1 Mar 2024 22:07:50 +0100
Subject: [PATCH] merge: debug

---
 llama.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index d4001fd58..b446aef26 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -11492,6 +11492,7 @@ int32_t llama_merge_models(const struct llama_merge_config * config) {
     };
 
     // process function, to be run as thread
+    // TODO: multi-threading here is done for each tensor (instead of each row like in llama_model_quantize_internal), this is not ideal but still better than single-thread
     const size_t n_start = n_curr;
     auto process_output_tensor = [&]() {
         worker_acquire();
@@ -11570,7 +11571,8 @@ int32_t llama_merge_models(const struct llama_merge_config * config) {
                     return n_done == my_number;
                 });
             }
-            LLAMA_LOG_ERROR("===> %f %f %f\n", f32_out_buf[0], f32_out_buf[1], f32_out_buf[2]);
+            LLAMA_LOG_ERROR("===> INPUT  [layer %d] %f %f %f\n", i_layer_out, f32_in_buf[0].value, f32_in_buf[1].value, f32_in_buf[2].value);
+            LLAMA_LOG_ERROR("===> OUTPUT [layer %d] %f %f %f\n", i_layer_out, f32_out_buf[0], f32_out_buf[1], f32_out_buf[2]);
             // my turn, write the result!
             // write tensor data + padding
             fout.write((const char *) out_buf.data(), out_buf.size());