From 8f84210df89c2b9665e395c9bbc5a3a7f9e8855e Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 13 Sep 2024 12:09:43 +0300
Subject: [PATCH] log : add comments + adjust defaults

ggml-ci
---
 common/arg.cpp                       | 24 +++++-------------
 common/log.cpp                       | 25 +++++++++++++-----
 common/log.h                         | 22 ++++++++++++----
 examples/export-lora/export-lora.cpp |  2 +-
 examples/imatrix/imatrix.cpp         | 38 ++++++++++++----------------
 tests/test-arg-parser.cpp            |  2 +-
 tests/test-log.cpp                   |  5 ++--
 7 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index 49f47e782..ca1534c98 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -385,20 +385,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             exit(0);
         }
     ));
-    add_opt(llama_arg(
-        {"-v", "--verbose"},
-        "print verbose information",
-        [](gpt_params & params) {
-            params.verbosity = 1;
-        }
-    ));
-    add_opt(llama_arg(
-        {"--verbosity"}, "N",
-        format("set specific verbosity level (default: %d)", params.verbosity),
-        [](gpt_params & params, int value) {
-            params.verbosity = value;
-        }
-    ));
     add_opt(llama_arg(
         {"--verbose-prompt"},
         format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
@@ -1967,16 +1953,18 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
         }
     ).set_env("LLAMA_LOG_COLORS"));
     add_opt(llama_arg(
-        {"-lv", "--log-verbose"},
+        {"-v", "--verbose", "--log-verbose"},
         "Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
-        [](gpt_params &) {
+        [](gpt_params & params) {
+            params.verbosity = INT_MAX;
             gpt_log_set_verbosity_thold(INT_MAX);
         }
     ));
     add_opt(llama_arg(
-        {"--log-verbosity"}, "THOLD",
+        {"-lv", "--verbosity", "--log-verbosity"}, "THOLD",
         "Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
-        [](gpt_params &, int value) {
+        [](gpt_params & params, int value) {
+            params.verbosity = value;
             gpt_log_set_verbosity_thold(value);
         }
     ).set_env("LLAMA_LOG_VERBOSITY"));
diff --git a/common/log.cpp b/common/log.cpp
index 49bab6a6a..99ff6bba2 100644
--- a/common/log.cpp
+++ b/common/log.cpp
@@ -113,15 +113,21 @@ struct gpt_log_entry {
 };
 
 struct gpt_log {
+    // default capacity - will be expanded if needed
+    gpt_log() : gpt_log(256) {}
+
     gpt_log(size_t capacity) {
         file = nullptr;
         timestamps = false;
         running = false;
         t_start = t_us();
+
+        // initial message size - will be expanded if longer messages arrive
         entries.resize(capacity);
         for (auto & entry : entries) {
             entry.msg.resize(256);
         }
+
         head = 0;
         tail = 0;
 
@@ -160,6 +166,7 @@ public:
         std::lock_guard<std::mutex> lock(mtx);
 
         if (!running) {
+            // discard messages while the worker thread is paused
             return;
         }
 
@@ -274,11 +281,13 @@ public:
 
             running = false;
 
-            auto & entry = entries[tail];
+            // push an entry to signal the worker thread to stop
+            {
+                auto & entry = entries[tail];
+                entry.is_end = true;
 
-            entry.is_end = true;
-
-            tail = (tail + 1) % entries.size();
+                tail = (tail + 1) % entries.size();
+            }
 
             cv.notify_one();
         }
@@ -331,12 +340,16 @@ public:
     }
 };
 
+//
+// public API
+//
+
 struct gpt_log * gpt_log_init() {
-    return new gpt_log{256};
+    return new gpt_log;
 }
 
 struct gpt_log * gpt_log_main() {
-    static struct gpt_log log{256};
+    static struct gpt_log log;
 
     return &log;
 }
diff --git a/common/log.h b/common/log.h
index 9e5775449..f00fae702 100644
--- a/common/log.h
+++ b/common/log.h
@@ -10,8 +10,8 @@
 #    define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
 
-#define LOG_DEFAULT_DEBUG 10
-#define LOG_DEFAULT_LLAMA 5
+#define LOG_DEFAULT_DEBUG 1
+#define LOG_DEFAULT_LLAMA 0
 
 // needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
 // set via gpt_log_set_verbosity()
@@ -19,12 +19,14 @@ extern int gpt_log_verbosity_thold;
 
 void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
 
+// the gpt_log uses an internal worker thread to print/write log messages
+// when the worker thread is paused, incoming log messages are discarded
 struct gpt_log;
 
 struct gpt_log * gpt_log_init();
-struct gpt_log * gpt_log_main();
-void             gpt_log_pause (struct gpt_log * log);
-void             gpt_log_resume(struct gpt_log * log);
+struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit
+void             gpt_log_pause (struct gpt_log * log); // pause  the worker thread, not thread-safe
+void             gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe
 void             gpt_log_free  (struct gpt_log * log);
 
 LOG_ATTRIBUTE_FORMAT(3, 4)
@@ -34,6 +36,16 @@ void gpt_log_set_file      (struct gpt_log * log, const char * file);       // n
 void gpt_log_set_colors    (struct gpt_log * log,       bool   colors);     // not thread-safe
 void gpt_log_set_timestamps(struct gpt_log * log,       bool   timestamps);
 
+// helper macros for logging
+// use these to avoid computing log arguments if the verbosity is lower than the threshold
+//
+// for example:
+//
+//   LOG_DBG("this is a debug message: %d\n", expensive_function());
+//
+// this will avoid calling expensive_function() if the verbosity is lower than LOG_DEFAULT_DEBUG
+//
+
 #define LOG_TMPL(level, verbosity, ...) \
     do { \
         if ((verbosity) <= gpt_log_verbosity_thold) { \
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index 90126ad1e..0051a5eb6 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -406,7 +406,7 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    g_verbose = (params.verbosity == 1);
+    g_verbose = (params.verbosity > 1);
     try {
         lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
         ctx.run_merge();
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 55d17b228..265281699 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -22,7 +22,7 @@
 static void print_usage(int, char ** argv) {
     LOG("\nexample usage:\n");
     LOG("\n    %s \\\n"
-            "       -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \\\n"
+            "       -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
             "       [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
             "       [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
     LOG("\n");
@@ -129,9 +129,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
             LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
             exit(1); //GGML_ABORT("fatal error");
         }
-        if (m_params.verbosity > 1) {
-            printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
-        }
+        LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
         // loop over all possible experts, regardless if they are used or not in the batch
         for (int ex = 0; ex < n_as; ++ex) {
             size_t e_start = ex*src1->ne[0];
@@ -180,9 +178,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
             exit(1); //GGML_ABORT("fatal error");
         }
         ++e.ncall;
-        if (m_params.verbosity > 1) {
-            LOG("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
-        }
+        LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
         for (int row = 0; row < (int)src1->ne[1]; ++row) {
             const float * x = data + row * src1->ne[0];
             for (int j = 0; j < (int)src1->ne[0]; ++j) {
@@ -292,21 +288,20 @@ void IMatrixCollector::save_imatrix(int ncall) const {
         out.write(m_params.prompt_file.c_str(), len);
     }
 
-    if (m_params.verbosity > 0) {
-        LOG_INF("\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
-    }
+    LOGV(1, "\n");
+    LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
 }
 
 bool IMatrixCollector::load_imatrix(const char * fname) {
     std::ifstream in(fname, std::ios::binary);
     if (!in) {
-        printf("%s: failed to open %s\n",__func__, fname);
+        LOG_ERR("%s: failed to open %s\n",__func__, fname);
         return false;
     }
     int n_entries;
     in.read((char*)&n_entries, sizeof(n_entries));
     if (in.fail() || n_entries < 1) {
-        printf("%s: no data in file %s\n", __func__, fname);
+        LOG_ERR("%s: no data in file %s\n", __func__, fname);
         return false;
     }
     for (int i = 0; i < n_entries; ++i) {
@@ -314,7 +309,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
         std::vector<char> name_as_vec(len+1);
         in.read((char *)name_as_vec.data(), len);
         if (in.fail()) {
-            printf("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname);
+            LOG_ERR("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname);
             return false;
         }
         name_as_vec[len] = 0;
@@ -325,7 +320,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
         int nval;
         in.read((char *)&nval, sizeof(nval));
         if (in.fail() || nval < 1) {
-            printf("%s: failed reading number of values for entry %d\n",__func__,i);
+            LOG_ERR("%s: failed reading number of values for entry %d\n",__func__,i);
             m_stats = {};
             return false;
         }
@@ -338,7 +333,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
         std::vector<float> tmp(nval);
         in.read((char*)tmp.data(), nval*sizeof(float));
         if (in.fail()) {
-            printf("%s: failed reading data for entry %d\n",__func__,i);
+            LOG_ERR("%s: failed reading data for entry %d\n",__func__,i);
             m_stats = {};
             return false;
         }
@@ -548,13 +543,13 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
                     workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first);
             count += n_ctx - first - 1;
 
-            printf("[%d]%.4lf,", i + 1, std::exp(nll / count));
+            LOG("[%d]%.4lf,", i + 1, std::exp(nll / count));
             fflush(stdout);
 
             logits.clear();
         }
     }
-    printf("\n");
+    LOG("\n");
 
     if (params.compute_ppl) {
         nll2 /= count;
@@ -563,9 +558,9 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
         nll2 -= nll * nll;
         if (nll2 > 0) {
             nll2 = sqrt(nll2/(count-1));
-            printf("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl);
+            LOG("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl);
         } else {
-            printf("Unexpected negative standard deviation of log(prob)\n");
+            LOG("Unexpected negative standard deviation of log(prob)\n");
         }
     }
 
@@ -577,7 +572,6 @@ int main(int argc, char ** argv) {
 
     params.n_ctx = 512;
     params.logits_all = true;
-    params.verbosity = 1;
 
     if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {
         return 1;
@@ -590,7 +584,7 @@ int main(int argc, char ** argv) {
     g_collector.set_params(params);
 
     for (const auto & in_file : params.in_files) {
-        printf("%s : loading imatrix from '%s'\n", __func__, in_file.c_str());
+        LOG_INF("%s : loading imatrix from '%s'\n", __func__, in_file.c_str());
         if (!g_collector.load_imatrix(in_file.c_str())) {
             LOG_ERR("%s : failed to load %s\n", __func__, in_file.c_str());
             return 1;
@@ -598,7 +592,7 @@ int main(int argc, char ** argv) {
     }
 
     if (params.in_files.size() > 1) {
-        printf("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str());
+        LOG_INF("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str());
         g_collector.save_imatrix();
     }
 
diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp
index f26707910..e07d09733 100644
--- a/tests/test-arg-parser.cpp
+++ b/tests/test-arg-parser.cpp
@@ -85,7 +85,7 @@ int main(void) {
 
     argv = {"binary_name", "--verbose"};
     assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.verbosity == 1);
+    assert(params.verbosity > 1);
 
     argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
     assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
diff --git a/tests/test-log.cpp b/tests/test-log.cpp
index c6eb89e58..06df144ee 100644
--- a/tests/test-log.cpp
+++ b/tests/test-log.cpp
@@ -5,11 +5,12 @@
 
 int main() {
     const int n_thread = 8;
-    const int n_msg = 1000;
 
     std::thread threads[n_thread];
     for (int i = 0; i < n_thread; i++) {
-        threads[i] = std::thread([i, n_msg]() {
+        threads[i] = std::thread([i]() {
+            const int n_msg = 1000;
+
             for (int j = 0; j < n_msg; j++) {
                 const int log_type = std::rand() % 4;