From e66962ff63eb01e80aa121ad9bcf884057f1b075 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABl=20Kerbiriou?= <m431.kerbiriou@gmail.com>
Date: Mon, 20 Mar 2023 12:15:01 +0100
Subject: [PATCH] log file for debug output

---
 main.cpp  | 13 +++++++++++++
 utils.cpp |  4 ++++
 utils.h   |  6 ++++++
 3 files changed, 23 insertions(+)
diff --git a/main.cpp b/main.cpp
index cdcdd2fc6..4d6e91826 100644
--- a/main.cpp
+++ b/main.cpp
@@ -803,6 +803,7 @@ const char * llama_print_system_info(void) {
 int main(int argc, char ** argv) {
     ggml_time_init();
     const int64_t t_main_start_us = ggml_time_us();
+    log_file = fopen("out.log", "w");
 
     gpt_params params;
     params.model = "models/llama-7B/ggml-model.bin";
@@ -960,6 +961,15 @@ int main(int argc, char ** argv) {
         if (embd.size() > 0) {
             const int64_t t_start_us = ggml_time_us();
 
+            if(log_file) {
+                std::string intokdbg = vocab.id_to_token.at(embd[0]);
+                for(int i = 1; i < embd.size(); i++) {
+                    intokdbg += '|';
+                    intokdbg += vocab.id_to_token.at(embd[i]);
+                }
+                logprintf("\nin:'%s' n_past=%d, remaining_tokens=%d, embd.size()=%zu, embd_inp.size()=%zu\n",
+                intokdbg.c_str(), n_past, remaining_tokens, embd.size(), embd_inp.size());
+            }
             if (!llama_eval(model, params.n_threads, n_past, embd, logits, mem_per_token)) {
                 fprintf(stderr, "Failed to predict\n");
                 return 1;
@@ -1079,6 +1089,7 @@ int main(int argc, char ** argv) {
             }
             is_interacting = false;
         }
+        if (log_file) fflush(log_file);
 
         // end of text token
         if (embd.back() == EOS_TOKEN_ID) {
@@ -1096,6 +1107,7 @@ int main(int argc, char ** argv) {
             is_interacting = true;
         }
     }
+    logprintf("exit: remaining_tokens=%d n_past=%d goal=%lu n_predict=%d\n", remaining_tokens, n_past, embd_inp.size() + params.n_predict, params.n_predict);
 
 #if defined (_WIN32)
     signal(SIGINT, SIG_DFL);
@@ -1114,6 +1126,7 @@ int main(int argc, char ** argv) {
     }
 
     ggml_free(model.ctx);
+    if (log_file) fclose(log_file);
 
     if (params.use_color) {
         printf(ANSI_COLOR_RESET);
diff --git a/utils.cpp b/utils.cpp
index 88cb39585..c71ffa8e7 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -16,6 +16,8 @@
  #include <alloca.h>
  #endif
 
+FILE * log_file = NULL;
+
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
     // determine sensible default number of threads.
     // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
@@ -576,6 +578,7 @@ struct SoftMaxSampler {
                 break;
             }
         }
+        logprintf("%s: n: %d sum: %f\n", __func__, n, cumsum);
 
         // discrete_distribution renormalizes the subset of probabilities to sum to 1.0
         return std::discrete_distribution<>(probs.begin(), probs.begin() + n);
@@ -633,6 +636,7 @@ gpt_vocab::id sample_top_k_top_p(
     auto dist = probs.top_p(top_p);
     int sampled_tok_id = probs.sample(dist, rng);
 
+    probs.print(log_file, vocab, logits, 16, sampled_tok_id);
 
     return sampled_tok_id;
 }
diff --git a/utils.h b/utils.h
index c1ca74a09..11609f801 100644
--- a/utils.h
+++ b/utils.h
@@ -8,6 +8,12 @@
 #include <random>
 #include <thread>
 
+#include <stdio.h>
+
+extern FILE * log_file;
+
+#define logprintf(...) { if (log_file) fprintf(log_file, __VA_ARGS__); }
+
 //
 // CLI argument parsing
 //