log : add comments + adjust defaults
ggml-ci
This commit is contained in:
parent
2afe0a0c7d
commit
8f84210df8
7 changed files with 63 additions and 55 deletions
|
@ -385,20 +385,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
|
||||||
{"-v", "--verbose"},
|
|
||||||
"print verbose information",
|
|
||||||
[](gpt_params & params) {
|
|
||||||
params.verbosity = 1;
|
|
||||||
}
|
|
||||||
));
|
|
||||||
add_opt(llama_arg(
|
|
||||||
{"--verbosity"}, "N",
|
|
||||||
format("set specific verbosity level (default: %d)", params.verbosity),
|
|
||||||
[](gpt_params & params, int value) {
|
|
||||||
params.verbosity = value;
|
|
||||||
}
|
|
||||||
));
|
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--verbose-prompt"},
|
{"--verbose-prompt"},
|
||||||
format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
|
format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
|
||||||
|
@ -1967,16 +1953,18 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_LOG_COLORS"));
|
).set_env("LLAMA_LOG_COLORS"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-lv", "--log-verbose"},
|
{"-v", "--verbose", "--log-verbose"},
|
||||||
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
|
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
|
||||||
[](gpt_params &) {
|
[](gpt_params & params) {
|
||||||
|
params.verbosity = INT_MAX;
|
||||||
gpt_log_set_verbosity_thold(INT_MAX);
|
gpt_log_set_verbosity_thold(INT_MAX);
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"--log-verbosity"}, "THOLD",
|
{"-lv", "--verbosity", "--log-verbosity"}, "THOLD",
|
||||||
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
|
||||||
[](gpt_params &, int value) {
|
[](gpt_params & params, int value) {
|
||||||
|
params.verbosity = value;
|
||||||
gpt_log_set_verbosity_thold(value);
|
gpt_log_set_verbosity_thold(value);
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_LOG_VERBOSITY"));
|
).set_env("LLAMA_LOG_VERBOSITY"));
|
||||||
|
|
|
@ -113,15 +113,21 @@ struct gpt_log_entry {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct gpt_log {
|
struct gpt_log {
|
||||||
|
// default capacity - will be expanded if needed
|
||||||
|
gpt_log() : gpt_log(256) {}
|
||||||
|
|
||||||
gpt_log(size_t capacity) {
|
gpt_log(size_t capacity) {
|
||||||
file = nullptr;
|
file = nullptr;
|
||||||
timestamps = false;
|
timestamps = false;
|
||||||
running = false;
|
running = false;
|
||||||
t_start = t_us();
|
t_start = t_us();
|
||||||
|
|
||||||
|
// initial message size - will be expanded if longer messages arrive
|
||||||
entries.resize(capacity);
|
entries.resize(capacity);
|
||||||
for (auto & entry : entries) {
|
for (auto & entry : entries) {
|
||||||
entry.msg.resize(256);
|
entry.msg.resize(256);
|
||||||
}
|
}
|
||||||
|
|
||||||
head = 0;
|
head = 0;
|
||||||
tail = 0;
|
tail = 0;
|
||||||
|
|
||||||
|
@ -160,6 +166,7 @@ public:
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
std::lock_guard<std::mutex> lock(mtx);
|
||||||
|
|
||||||
if (!running) {
|
if (!running) {
|
||||||
|
// discard messages while the worker thread is paused
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,11 +281,13 @@ public:
|
||||||
|
|
||||||
running = false;
|
running = false;
|
||||||
|
|
||||||
|
// push an entry to signal the worker thread to stop
|
||||||
|
{
|
||||||
auto & entry = entries[tail];
|
auto & entry = entries[tail];
|
||||||
|
|
||||||
entry.is_end = true;
|
entry.is_end = true;
|
||||||
|
|
||||||
tail = (tail + 1) % entries.size();
|
tail = (tail + 1) % entries.size();
|
||||||
|
}
|
||||||
|
|
||||||
cv.notify_one();
|
cv.notify_one();
|
||||||
}
|
}
|
||||||
|
@ -331,12 +340,16 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//
|
||||||
|
// public API
|
||||||
|
//
|
||||||
|
|
||||||
struct gpt_log * gpt_log_init() {
|
struct gpt_log * gpt_log_init() {
|
||||||
return new gpt_log{256};
|
return new gpt_log;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gpt_log * gpt_log_main() {
|
struct gpt_log * gpt_log_main() {
|
||||||
static struct gpt_log log{256};
|
static struct gpt_log log;
|
||||||
|
|
||||||
return &log;
|
return &log;
|
||||||
}
|
}
|
||||||
|
|
22
common/log.h
22
common/log.h
|
@ -10,8 +10,8 @@
|
||||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LOG_DEFAULT_DEBUG 10
|
#define LOG_DEFAULT_DEBUG 1
|
||||||
#define LOG_DEFAULT_LLAMA 5
|
#define LOG_DEFAULT_LLAMA 0
|
||||||
|
|
||||||
// needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
|
// needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
|
||||||
// set via gpt_log_set_verbosity()
|
// set via gpt_log_set_verbosity()
|
||||||
|
@ -19,12 +19,14 @@ extern int gpt_log_verbosity_thold;
|
||||||
|
|
||||||
void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
|
void gpt_log_set_verbosity_thold(int verbosity); // not thread-safe
|
||||||
|
|
||||||
|
// the gpt_log uses an internal worker thread to print/write log messages
|
||||||
|
// when the worker thread is paused, incoming log messages are discarded
|
||||||
struct gpt_log;
|
struct gpt_log;
|
||||||
|
|
||||||
struct gpt_log * gpt_log_init();
|
struct gpt_log * gpt_log_init();
|
||||||
struct gpt_log * gpt_log_main();
|
struct gpt_log * gpt_log_main(); // singleton, automatically destroys itself on exit
|
||||||
void gpt_log_pause (struct gpt_log * log);
|
void gpt_log_pause (struct gpt_log * log); // pause the worker thread, not thread-safe
|
||||||
void gpt_log_resume(struct gpt_log * log);
|
void gpt_log_resume(struct gpt_log * log); // resume the worker thread, not thread-safe
|
||||||
void gpt_log_free (struct gpt_log * log);
|
void gpt_log_free (struct gpt_log * log);
|
||||||
|
|
||||||
LOG_ATTRIBUTE_FORMAT(3, 4)
|
LOG_ATTRIBUTE_FORMAT(3, 4)
|
||||||
|
@ -34,6 +36,16 @@ void gpt_log_set_file (struct gpt_log * log, const char * file); // n
|
||||||
void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe
|
void gpt_log_set_colors (struct gpt_log * log, bool colors); // not thread-safe
|
||||||
void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps);
|
void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps);
|
||||||
|
|
||||||
|
// helper macros for logging
|
||||||
|
// use these to avoid computing log arguments if the verbosity is lower than the threshold
|
||||||
|
//
|
||||||
|
// for example:
|
||||||
|
//
|
||||||
|
// LOG_DBG("this is a debug message: %d\n", expensive_function());
|
||||||
|
//
|
||||||
|
// this will avoid calling expensive_function() if the verbosity is lower than LOG_DEFAULT_DEBUG
|
||||||
|
//
|
||||||
|
|
||||||
#define LOG_TMPL(level, verbosity, ...) \
|
#define LOG_TMPL(level, verbosity, ...) \
|
||||||
do { \
|
do { \
|
||||||
if ((verbosity) <= gpt_log_verbosity_thold) { \
|
if ((verbosity) <= gpt_log_verbosity_thold) { \
|
||||||
|
|
|
@ -406,7 +406,7 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_verbose = (params.verbosity == 1);
|
g_verbose = (params.verbosity > 1);
|
||||||
try {
|
try {
|
||||||
lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
|
lora_merge_ctx ctx(params.model, params.lora_adapters, params.lora_outfile, params.cpuparams.n_threads);
|
||||||
ctx.run_merge();
|
ctx.run_merge();
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
static void print_usage(int, char ** argv) {
|
static void print_usage(int, char ** argv) {
|
||||||
LOG("\nexample usage:\n");
|
LOG("\nexample usage:\n");
|
||||||
LOG("\n %s \\\n"
|
LOG("\n %s \\\n"
|
||||||
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] [--verbosity 1] \\\n"
|
" -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
|
||||||
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
|
||||||
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
|
" [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
|
||||||
LOG("\n");
|
LOG("\n");
|
||||||
|
@ -129,9 +129,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
||||||
LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
|
LOG_ERR("%s: inconsistent size for %s (%d vs %d)\n", __func__, wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
|
||||||
exit(1); //GGML_ABORT("fatal error");
|
exit(1); //GGML_ABORT("fatal error");
|
||||||
}
|
}
|
||||||
if (m_params.verbosity > 1) {
|
LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
|
||||||
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
|
|
||||||
}
|
|
||||||
// loop over all possible experts, regardless if they are used or not in the batch
|
// loop over all possible experts, regardless if they are used or not in the batch
|
||||||
for (int ex = 0; ex < n_as; ++ex) {
|
for (int ex = 0; ex < n_as; ++ex) {
|
||||||
size_t e_start = ex*src1->ne[0];
|
size_t e_start = ex*src1->ne[0];
|
||||||
|
@ -180,9 +178,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
||||||
exit(1); //GGML_ABORT("fatal error");
|
exit(1); //GGML_ABORT("fatal error");
|
||||||
}
|
}
|
||||||
++e.ncall;
|
++e.ncall;
|
||||||
if (m_params.verbosity > 1) {
|
LOG_DBGV(2, "%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
|
||||||
LOG("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[1], (int)src1->type);
|
|
||||||
}
|
|
||||||
for (int row = 0; row < (int)src1->ne[1]; ++row) {
|
for (int row = 0; row < (int)src1->ne[1]; ++row) {
|
||||||
const float * x = data + row * src1->ne[0];
|
const float * x = data + row * src1->ne[0];
|
||||||
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
for (int j = 0; j < (int)src1->ne[0]; ++j) {
|
||||||
|
@ -292,21 +288,20 @@ void IMatrixCollector::save_imatrix(int ncall) const {
|
||||||
out.write(m_params.prompt_file.c_str(), len);
|
out.write(m_params.prompt_file.c_str(), len);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_params.verbosity > 0) {
|
LOGV(1, "\n");
|
||||||
LOG_INF("\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
|
LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IMatrixCollector::load_imatrix(const char * fname) {
|
bool IMatrixCollector::load_imatrix(const char * fname) {
|
||||||
std::ifstream in(fname, std::ios::binary);
|
std::ifstream in(fname, std::ios::binary);
|
||||||
if (!in) {
|
if (!in) {
|
||||||
printf("%s: failed to open %s\n",__func__, fname);
|
LOG_ERR("%s: failed to open %s\n",__func__, fname);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int n_entries;
|
int n_entries;
|
||||||
in.read((char*)&n_entries, sizeof(n_entries));
|
in.read((char*)&n_entries, sizeof(n_entries));
|
||||||
if (in.fail() || n_entries < 1) {
|
if (in.fail() || n_entries < 1) {
|
||||||
printf("%s: no data in file %s\n", __func__, fname);
|
LOG_ERR("%s: no data in file %s\n", __func__, fname);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < n_entries; ++i) {
|
for (int i = 0; i < n_entries; ++i) {
|
||||||
|
@ -314,7 +309,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
|
||||||
std::vector<char> name_as_vec(len+1);
|
std::vector<char> name_as_vec(len+1);
|
||||||
in.read((char *)name_as_vec.data(), len);
|
in.read((char *)name_as_vec.data(), len);
|
||||||
if (in.fail()) {
|
if (in.fail()) {
|
||||||
printf("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname);
|
LOG_ERR("%s: failed reading name for entry %d from %s\n",__func__,i+1, fname);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
name_as_vec[len] = 0;
|
name_as_vec[len] = 0;
|
||||||
|
@ -325,7 +320,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
|
||||||
int nval;
|
int nval;
|
||||||
in.read((char *)&nval, sizeof(nval));
|
in.read((char *)&nval, sizeof(nval));
|
||||||
if (in.fail() || nval < 1) {
|
if (in.fail() || nval < 1) {
|
||||||
printf("%s: failed reading number of values for entry %d\n",__func__,i);
|
LOG_ERR("%s: failed reading number of values for entry %d\n",__func__,i);
|
||||||
m_stats = {};
|
m_stats = {};
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -338,7 +333,7 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
|
||||||
std::vector<float> tmp(nval);
|
std::vector<float> tmp(nval);
|
||||||
in.read((char*)tmp.data(), nval*sizeof(float));
|
in.read((char*)tmp.data(), nval*sizeof(float));
|
||||||
if (in.fail()) {
|
if (in.fail()) {
|
||||||
printf("%s: failed reading data for entry %d\n",__func__,i);
|
LOG_ERR("%s: failed reading data for entry %d\n",__func__,i);
|
||||||
m_stats = {};
|
m_stats = {};
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -548,13 +543,13 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
|
||||||
workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first);
|
workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first);
|
||||||
count += n_ctx - first - 1;
|
count += n_ctx - first - 1;
|
||||||
|
|
||||||
printf("[%d]%.4lf,", i + 1, std::exp(nll / count));
|
LOG("[%d]%.4lf,", i + 1, std::exp(nll / count));
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
|
|
||||||
logits.clear();
|
logits.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\n");
|
LOG("\n");
|
||||||
|
|
||||||
if (params.compute_ppl) {
|
if (params.compute_ppl) {
|
||||||
nll2 /= count;
|
nll2 /= count;
|
||||||
|
@ -563,9 +558,9 @@ static bool compute_imatrix(llama_context * ctx, const gpt_params & params) {
|
||||||
nll2 -= nll * nll;
|
nll2 -= nll * nll;
|
||||||
if (nll2 > 0) {
|
if (nll2 > 0) {
|
||||||
nll2 = sqrt(nll2/(count-1));
|
nll2 = sqrt(nll2/(count-1));
|
||||||
printf("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl);
|
LOG("Final estimate: PPL = %.4lf +/- %.5lf\n", ppl, nll2*ppl);
|
||||||
} else {
|
} else {
|
||||||
printf("Unexpected negative standard deviation of log(prob)\n");
|
LOG("Unexpected negative standard deviation of log(prob)\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -577,7 +572,6 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
params.n_ctx = 512;
|
params.n_ctx = 512;
|
||||||
params.logits_all = true;
|
params.logits_all = true;
|
||||||
params.verbosity = 1;
|
|
||||||
|
|
||||||
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {
|
if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_IMATRIX, print_usage)) {
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -590,7 +584,7 @@ int main(int argc, char ** argv) {
|
||||||
g_collector.set_params(params);
|
g_collector.set_params(params);
|
||||||
|
|
||||||
for (const auto & in_file : params.in_files) {
|
for (const auto & in_file : params.in_files) {
|
||||||
printf("%s : loading imatrix from '%s'\n", __func__, in_file.c_str());
|
LOG_INF("%s : loading imatrix from '%s'\n", __func__, in_file.c_str());
|
||||||
if (!g_collector.load_imatrix(in_file.c_str())) {
|
if (!g_collector.load_imatrix(in_file.c_str())) {
|
||||||
LOG_ERR("%s : failed to load %s\n", __func__, in_file.c_str());
|
LOG_ERR("%s : failed to load %s\n", __func__, in_file.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -598,7 +592,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.in_files.size() > 1) {
|
if (params.in_files.size() > 1) {
|
||||||
printf("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str());
|
LOG_INF("%s : saving combined imatrix to '%s'\n", __func__, params.out_file.c_str());
|
||||||
g_collector.save_imatrix();
|
g_collector.save_imatrix();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ int main(void) {
|
||||||
|
|
||||||
argv = {"binary_name", "--verbose"};
|
argv = {"binary_name", "--verbose"};
|
||||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||||
assert(params.verbosity == 1);
|
assert(params.verbosity > 1);
|
||||||
|
|
||||||
argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
|
argv = {"binary_name", "-m", "abc.gguf", "--predict", "6789", "--batch-size", "9090"};
|
||||||
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
assert(true == gpt_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
|
||||||
|
|
|
@ -5,11 +5,12 @@
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
const int n_thread = 8;
|
const int n_thread = 8;
|
||||||
const int n_msg = 1000;
|
|
||||||
|
|
||||||
std::thread threads[n_thread];
|
std::thread threads[n_thread];
|
||||||
for (int i = 0; i < n_thread; i++) {
|
for (int i = 0; i < n_thread; i++) {
|
||||||
threads[i] = std::thread([i, n_msg]() {
|
threads[i] = std::thread([i]() {
|
||||||
|
const int n_msg = 1000;
|
||||||
|
|
||||||
for (int j = 0; j < n_msg; j++) {
|
for (int j = 0; j < n_msg; j++) {
|
||||||
const int log_type = std::rand() % 4;
|
const int log_type = std::rand() % 4;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue