common : reimplement logging (#9418)

https://github.com/ggerganov/llama.cpp/pull/9418
This commit is contained in:
Georgi Gerganov 2024-09-15 20:46:12 +03:00 committed by GitHub
parent e6deac31f7
commit 6262d13e0b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
54 changed files with 2092 additions and 2419 deletions

View file

@ -1,6 +1,7 @@
#include "arg.h"
#include "common.h"
#include "sampling.h"
#include "log.h"
#include "llama.h"
#include <cstdio>
@ -42,18 +43,14 @@ int main(int argc, char ** argv) {
return 1;
}
gpt_init();
const int W = 15; // lookahead window
const int N = 5; // n-gram size
const int G = 15; // max verification n-grams
const bool dump_kv_cache = params.dump_kv_cache;
#ifndef LOG_DISABLE_LOGS
log_set_target(log_filename_generator("lookahead", "log"));
LOG_TEE("Log start\n");
log_dump_cmdline(argc, argv);
#endif // LOG_DISABLE_LOGS
// init llama.cpp
llama_backend_init();
llama_numa_init(params.numa);
@ -75,14 +72,14 @@ int main(int argc, char ** argv) {
const int max_tokens_list_size = max_context_size - 4;
if ((int) inp.size() > max_tokens_list_size) {
fprintf(stderr, "%s: error: prompt too long (%d tokens, max %d)\n", __func__, (int) inp.size(), max_tokens_list_size);
LOG_ERR("%s: prompt too long (%d tokens, max %d)\n", __func__, (int) inp.size(), max_tokens_list_size);
return 1;
}
fprintf(stderr, "\n\n");
LOG("\n\n");
for (auto id : inp) {
fprintf(stderr, "%s", llama_token_to_piece(ctx, id).c_str());
LOG("%s", llama_token_to_piece(ctx, id).c_str());
}
fflush(stderr);
@ -166,7 +163,7 @@ int main(int argc, char ** argv) {
{
const std::string token_str = llama_token_to_piece(ctx, id);
printf("%s", token_str.c_str());
LOG("%s", token_str.c_str());
fflush(stdout);
}
}
@ -256,7 +253,7 @@ int main(int argc, char ** argv) {
}
if (llama_decode(ctx, batch) != 0) {
fprintf(stderr, "\n\n%s: error: llama_decode failed - increase KV cache size\n", __func__);
LOG_ERR("\n\n%s: llama_decode failed - increase KV cache size\n", __func__);
return 1;
}
@ -293,10 +290,10 @@ int main(int argc, char ** argv) {
const std::string token_str = llama_token_to_piece(ctx, id);
if (v == 0) {
printf("%s", token_str.c_str());
LOG("%s", token_str.c_str());
} else {
// print light cyan
printf("\033[0;96m%s\033[0m", token_str.c_str());
LOG("\033[0;96m%s\033[0m", token_str.c_str());
}
fflush(stdout);
@ -330,21 +327,21 @@ int main(int argc, char ** argv) {
// print known n-grams starting with token id (debug)
if (0 && v == 0) {
if (ngrams_observed.cnt[id] > 0) {
printf("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], llama_token_to_piece(ctx, id).c_str());
LOG("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], llama_token_to_piece(ctx, id).c_str());
}
for (int i = 0; i < ngrams_observed.cnt[id]; i++) {
printf(" - ngram %2d: ", i);
LOG(" - ngram %2d: ", i);
const int idx = id*(N - 1)*G + i*(N - 1);
for (int j = 0; j < N - 1; j++) {
const std::string token_str = llama_token_to_piece(ctx, ngrams_observed.tokens[idx + j]);
printf("%s", token_str.c_str());
LOG("%s", token_str.c_str());
}
printf("\n");
LOG("\n");
}
}
@ -455,20 +452,20 @@ int main(int argc, char ** argv) {
auto t_dec_end = ggml_time_us();
LOG_TEE("\n\n");
LOG("\n\n");
LOG_TEE("encoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_input, (t_enc_end - t_enc_start) / 1e6f, inp.size() / ((t_enc_end - t_enc_start) / 1e6f));
LOG_TEE("decoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_predict, (t_dec_end - t_dec_start) / 1e6f, n_predict / ((t_dec_end - t_dec_start) / 1e6f));
LOG_INF("encoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_input, (t_enc_end - t_enc_start) / 1e6f, inp.size() / ((t_enc_end - t_enc_start) / 1e6f));
LOG_INF("decoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_predict, (t_dec_end - t_dec_start) / 1e6f, n_predict / ((t_dec_end - t_dec_start) / 1e6f));
LOG_TEE("\n");
LOG_TEE("W = %2d\n", W);
LOG_TEE("N = %2d\n", N);
LOG_TEE("G = %2d\n", G);
LOG_TEE("\n");
LOG_TEE("n_predict = %d\n", n_predict);
LOG_TEE("n_accept = %d\n", n_accept);
LOG_INF("\n");
LOG_INF("W = %2d\n", W);
LOG_INF("N = %2d\n", N);
LOG_INF("G = %2d\n", G);
LOG_INF("\n");
LOG_INF("n_predict = %d\n", n_predict);
LOG_INF("n_accept = %d\n", n_accept);
LOG_TEE("\n");
LOG_INF("\n");
gpt_perf_print(ctx, smpl);
gpt_sampler_free(smpl);
@ -482,7 +479,7 @@ int main(int argc, char ** argv) {
llama_backend_free();
fprintf(stderr, "\n\n");
LOG("\n\n");
return 0;
}