From 8f84210df89c2b9665e395c9bbc5a3a7f9e8855e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Sep 2024 12:09:43 +0300 Subject: [PATCH] log : add comments + adjust defaults ggml-ci --- common/arg.cpp | 24 +++++------------- common/log.cpp | 25 +++++++++++++----- common/log.h | 22 ++++++++++++---- examples/export-lora/export-lora.cpp | 2 +- examples/imatrix/imatrix.cpp | 38 ++++++++++++---------------- tests/test-arg-parser.cpp | 2 +- tests/test-log.cpp | 5 ++-- 7 files changed, 63 insertions(+), 55 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 49f47e782..ca1534c98 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -385,20 +385,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, exit(0); } )); - add_opt(llama_arg( - {"-v", "--verbose"}, - "print verbose information", - [](gpt_params & params) { - params.verbosity = 1; - } - )); - add_opt(llama_arg( - {"--verbosity"}, "N", - format("set specific verbosity level (default: %d)", params.verbosity), - [](gpt_params & params, int value) { - params.verbosity = value; - } - )); add_opt(llama_arg( {"--verbose-prompt"}, format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), @@ -1967,16 +1953,18 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex, } ).set_env("LLAMA_LOG_COLORS")); add_opt(llama_arg( - {"-lv", "--log-verbose"}, + {"-v", "--verbose", "--log-verbose"}, "Set verbosity level to infinity (i.e. log all messages, useful for debugging)", - [](gpt_params &) { + [](gpt_params & params) { + params.verbosity = INT_MAX; gpt_log_set_verbosity_thold(INT_MAX); } )); add_opt(llama_arg( - {"--log-verbosity"}, "THOLD", + {"-lv", "--verbosity", "--log-verbosity"}, "THOLD", "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", - [](gpt_params &, int value) { + [](gpt_params & params, int value) { + params.verbosity = value; gpt_log_set_verbosity_thold(value); } ).set_env("LLAMA_LOG_VERBOSITY")); diff --git a/common/log.cpp b/common/log.cpp index 49bab6a6a..99ff6bba2 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -113,15 +113,21 @@ struct gpt_log_entry { }; struct gpt_log { + // default capacity - will be expanded if needed + gpt_log() : gpt_log(256) {} + gpt_log(size_t capacity) { file = nullptr; timestamps = false; running = false; t_start = t_us(); + + // initial message size - will be expanded if longer messages arrive entries.resize(capacity); for (auto & entry : entries) { entry.msg.resize(256); } + head = 0; tail = 0; @@ -160,6 +166,7 @@ public: std::lock_guard lock(mtx); if (!running) { + // discard messages while the worker thread is paused return; } @@ -274,11 +281,13 @@ public: running = false; - auto & entry = entries[tail]; + // push an entry to signal the worker thread to stop + { + auto & entry = entries[tail]; + entry.is_end = true; - entry.is_end = true; - - tail = (tail + 1) % entries.size(); + tail = (tail + 1) % entries.size(); + } cv.notify_one(); } @@ -331,12 +340,16 @@ public: } }; +// +// public API +// + struct gpt_log * gpt_log_init() { - return new gpt_log{256}; + return new gpt_log; } struct gpt_log * gpt_log_main() { - static struct gpt_log log{256}; + static struct gpt_log log; return &log; } diff --git a/common/log.h b/common/log.h index 9e5775449..f00fae702 100644 --- a/common/log.h +++ b/common/log.h @@ -10,8 +10,8 @@ # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) #endif -#define LOG_DEFAULT_DEBUG 10 -#define LOG_DEFAULT_LLAMA 5 +#define LOG_DEFAULT_DEBUG 1 +#define LOG_DEFAULT_LLAMA 0 // needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower // set via gpt_log_set_verbosity() @@ -19,12 +19,14 @@ extern int gpt_log_verbosity_thold; void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe +// the gpt_log uses an internal worker thread to print/write log messages +// when the worker thread is paused, incoming log messages are discarded struct gpt_log; struct gpt_log * gpt_log_init(); -struct gpt_log * gpt_log_main(); -void gpt_log_pause (struct gpt_log * log); -void gpt_log_resume(struct gpt_log * log); +struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit +void gpt_log_pause (struct gpt_log * log); // pause the worker thread, not thread-safe +void gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe void gpt_log_free (struct gpt_log * log); LOG_ATTRIBUTE_FORMAT(3, 4) @@ -34,6 +36,16 @@ void gpt_log_set_file (struct gpt_log * log, const char * file); // n void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); +// helper macros for logging +// use these to avoid computing log arguments if the verbosity is lower than the threshold +// +// for example: +// +// LOG_DBG("this is a debug message: %d\n", expensive_function()); +// +// this will avoid calling expensive_function() if the verbosity is lower than LOG_DEFAULT_DEBUG +// + #define LOG_TMPL(level, verbosity, ...) \ do { \ if ((verbosity) <= gpt_log_verbosity_thold) { \ diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp index 90126ad1e..0051a5eb6 100644 --- a/examples/export-lora/export-lora.cpp +++ b/examples/export-lora/export-lora.cpp @@ -406,7 +406,7 @@ int main(int argc, char ** argv) { return 1; } - g_verbose = (params.verbosity == 1); + g_verbose = (params.verbosity > 1); try { lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads); ctx.run_merge(); diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 55d17b228..265281699 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -22,7 +22,7 @@ static void print_usage(int, char ** argv) { LOG("\nexample usage:\n"); LOG("\n %s \\\n" - " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \\\n" + " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n" " [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n" " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]); LOG("\n"); @@ -129,9 +129,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as); exit(1); //GGML_ABORT("fatal error"); } - if (m_params.verbosity > 1) { - printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type); - } + LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type); // loop over all possible experts, regardless if they are used or not in the batch for (int ex = 0; ex < n_as; ++ex) { size_t e_start = ex*src1->ne[0]; @@ -180,9 +178,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * exit(1); //GGML_ABORT("fatal error"); } ++e.ncall; - if (m_params.verbosity > 1) { - LOG("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type); - } + LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type); for (int row = 0; row < (int)src1->ne[1]; ++row) { const float * x = data + row * src1->ne[0]; for (int j = 0; j < (int)src1->ne[0]; ++j) { @@ -292,21 +288,20 @@ void IMatrixCollector::save_imatrix(int ncall) const { out.write(m_params.prompt_file.c_str(), len); } - if (m_params.verbosity > 0) { - LOG_INF("\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str()); - } + LOGV(1, "\n"); + LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str()); } bool IMatrixCollector::load_imatrix(const char * fname) { std::ifstream in(fname, std::ios::binary); if (!in) { - printf("%s: failed to open %s\n",__func__, fname); + LOG_ERR("%s: failed to open %s\n",__func__, fname); return false; } int n_entries; in.read((char*)&n_entries, sizeof(n_entries)); if (in.fail() || n_entries < 1) { - printf("%s: no data in file %s\n", __func__, fname); + LOG_ERR("%s: no data in file %s\n", __func__, fname); return false; } for (int i = 0; i < n_entries; ++i) { @@ -314,7 +309,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) { std::vector name_as_vec(len+1); in.read((char *)name_as_vec.data(), len); if (in.fail()) { - printf("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname); + LOG_ERR("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname); return false; } name_as_vec[len] = 0; @@ -325,7 +320,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) { int nval; in.read((char *)&nval, sizeof(nval)); if (in.fail() || nval < 1) { - printf("%s: failed reading number of values for entry %d\n",__func__,i); + LOG_ERR("%s: failed reading number of values for entry %d\n",__func__,i); m_stats = {}; return false; } @@ -338,7 +333,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) { std::vector tmp(nval); in.read((char*)tmp.data(), nval*sizeof(float)); if (in.fail()) { - printf("%s: failed reading data for entry %d\n",__func__,i); + LOG_ERR("%s: failed reading data for entry %d\n",__func__,i); m_stats = {}; return false; } @@ -548,13 +543,13 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first); count += n_ctx - first - 1; - printf("[%d]%.4lf,", i + 1, std::exp(nll / count)); + LOG("[%d]%.4lf,", i + 1, std::exp(nll / count)); fflush(stdout); logits.clear(); } } - printf("\n"); + LOG("\n"); if (params.compute_ppl) { nll2 /= count; @@ -563,9 +558,9 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) { nll2 -= nll * nll; if (nll2 > 0) { nll2 = sqrt(nll2/(count-1)); - printf("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl); + LOG("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl); } else { - printf("Unexpected negative standard deviation of log(prob)\n"); + LOG("Unexpected negative standard deviation of log(prob)\n"); } } @@ -577,7 +572,6 @@ int main(int argc, char ** argv) { params.n_ctx = 512; params.logits_all = true; - params.verbosity = 1; if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) { return 1; @@ -590,7 +584,7 @@ int main(int argc, char ** argv) { g_collector.set_params(params); for (const auto & in_file : params.in_files) { - printf("%s : loading imatrix from '%s'\n", __func__, in_file.c_str()); + LOG_INF("%s : loading imatrix from '%s'\n", __func__, in_file.c_str()); if (!g_collector.load_imatrix(in_file.c_str())) { LOG_ERR("%s : failed to load %s\n", __func__, in_file.c_str()); return 1; @@ -598,7 +592,7 @@ int main(int argc, char ** argv) { } if (params.in_files.size() > 1) { - printf("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str()); + LOG_INF("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str()); g_collector.save_imatrix(); } diff --git a/tests/test-arg-parser.cpp b/tests/test-arg-parser.cpp index f26707910..e07d09733 100644 --- a/tests/test-arg-parser.cpp +++ b/tests/test-arg-parser.cpp @@ -85,7 +85,7 @@ int main(void) { argv = {"binary_name", "--verbose"}; assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); - assert(params.verbosity == 1); + assert(params.verbosity > 1); argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"}; assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON)); diff --git a/tests/test-log.cpp b/tests/test-log.cpp index c6eb89e58..06df144ee 100644 --- a/tests/test-log.cpp +++ b/tests/test-log.cpp @@ -5,11 +5,12 @@ int main() { const int n_thread = 8; - const int n_msg = 1000; std::thread threads[n_thread]; for (int i = 0; i < n_thread; i++) { - threads[i] = std::thread([i, n_msg]() { + threads[i] = std::thread([i]() { + const int n_msg = 1000; + for (int j = 0; j < n_msg; j++) { const int log_type = std::rand() % 4;