Convert remaining fprintf(stderr, ...) calls to use new macros.

This commit is contained in:
Helmut 2023-07-23 19:16:43 +02:00
parent 21d1e8bab2
commit 33b4202403

View file

@ -590,7 +590,7 @@ struct llama_file_saver {
llama_file_loader * any_file_loader; llama_file_loader * any_file_loader;
llama_file_saver(const char * fname, llama_file_loader * any_file_loader, enum llama_ftype new_ftype) llama_file_saver(const char * fname, llama_file_loader * any_file_loader, enum llama_ftype new_ftype)
: file(fname, "wb"), any_file_loader(any_file_loader) { : file(fname, "wb"), any_file_loader(any_file_loader) {
fprintf(stderr, "llama.cpp: saving model to %s\n", fname); LLAMA_LOG_INFO("llama.cpp: saving model to %s", fname);
write_magic(); write_magic();
write_hparams(new_ftype); write_hparams(new_ftype);
write_vocab(); write_vocab();
@ -611,7 +611,7 @@ struct llama_file_saver {
} }
void write_vocab() { void write_vocab() {
if (any_file_loader->file_version == LLAMA_FILE_VERSION_GGML) { if (any_file_loader->file_version == LLAMA_FILE_VERSION_GGML) {
fprintf(stderr, "llama.cpp: WARNING: input is an old file that doesn't have scores; will add dummy scores\n"); LLAMA_LOG_WARN("llama.cpp: WARNING: input is an old file that doesn't have scores; will add dummy scores");
} }
uint32_t n_vocab = any_file_loader->hparams.n_vocab; uint32_t n_vocab = any_file_loader->hparams.n_vocab;
for (uint32_t i = 0; i < n_vocab; i++) { for (uint32_t i = 0; i < n_vocab; i++) {
@ -802,7 +802,7 @@ struct llama_model_loader {
uint8_t byte = lt.data[i]; uint8_t byte = lt.data[i];
sum = byte + (sum << 6) + (sum << 16) - sum; // sdbm hash sum = byte + (sum << 6) + (sum << 16) - sum; // sdbm hash
} }
fprintf(stderr, "%s checksum: %#08x (%s, size %zu)\n", lt.name.c_str(), sum, LLAMA_LOG_INFO("%s checksum: %#08x (%s, size %zu)", lt.name.c_str(), sum,
llama_format_tensor_shape(lt.ne).c_str(), lt.size); llama_format_tensor_shape(lt.ne).c_str(), lt.size);
} }
@ -835,7 +835,7 @@ static bool kv_cache_init(
cache.ctx = ggml_init(params); cache.ctx = ggml_init(params);
if (!cache.ctx) { if (!cache.ctx) {
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__); LLAMA_LOG_ERROR("%s: failed to allocate memory for kv cache", __func__);
return false; return false;
} }
@ -1107,7 +1107,7 @@ static void llama_model_load_internal(
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT #define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT
#elif defined(GGML_USE_CLBLAST) #elif defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: using OpenCL for GPU acceleration", __func__); LLAMA_LOG_INFO("%s: using OpenCL for GPU acceleration", __func__);
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU #define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU #define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU
#else #else
@ -2554,7 +2554,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
int nx = tensor.ne.at(0); int nx = tensor.ne.at(0);
int ny = tensor.ne.at(1); int ny = tensor.ne.at(1);
if (nx % QK_K != 0 || ny % QK_K != 0) { if (nx % QK_K != 0 || ny % QK_K != 0) {
fprintf(stderr, "\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.\n",nx,ny,QK_K); LLAMA_LOG_INFO("\n\nTensor sizes %d x %d are not divisible by %d, required for k-quants.",nx,ny,QK_K);
convert_incompatible_tensor = true; convert_incompatible_tensor = true;
} }
} }
@ -2586,10 +2586,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
if (convert_incompatible_tensor) { if (convert_incompatible_tensor) {
if (tensor.name == "output.weight") { if (tensor.name == "output.weight") {
new_type = GGML_TYPE_F16; //fall back to F16 instead of just failing. new_type = GGML_TYPE_F16; //fall back to F16 instead of just failing.
fprintf(stderr, "F16 will be used for this tensor instead.\n"); LLAMA_LOG_WARN("F16 will be used for this tensor instead.\n");
} else if (tensor.name == "tok_embeddings.weight") { } else if (tensor.name == "tok_embeddings.weight") {
new_type = GGML_TYPE_Q4_0; //fall back to Q4_0 instead of just failing. new_type = GGML_TYPE_Q4_0; //fall back to Q4_0 instead of just failing.
fprintf(stderr, "Q4_0 will be used for this tensor instead.\n"); LLAMA_LOG_WARN("Q4_0 will be used for this tensor instead.\n");
} else { } else {
throw std::runtime_error("Unsupported tensor size encountered\n"); throw std::runtime_error("Unsupported tensor size encountered\n");
} }
@ -2814,7 +2814,7 @@ struct llama_context * llama_new_context_with_model(
const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx); const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx);
printf("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0); LLAMA_LOG_INFO("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0);
#define LLAMA_METAL_CHECK_BUF(result) \ #define LLAMA_METAL_CHECK_BUF(result) \
if (!(result)) { \ if (!(result)) { \
@ -2877,7 +2877,7 @@ int llama_model_quantize(
llama_model_quantize_internal(fname_inp, fname_out, params); llama_model_quantize_internal(fname_inp, fname_out, params);
return 0; return 0;
} catch (const std::exception & err) { } catch (const std::exception & err) {
fprintf(stderr, "%s: failed to quantize: %s\n", __func__, err.what()); LLAMA_LOG_ERROR("%s: failed to quantize: %s", __func__, err.what());
return 1; return 1;
} }
} }