From e524750a6c23dbe238649ea528f2b9a949b3c499 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 16 Aug 2023 14:24:04 +0300 Subject: [PATCH] llama : improve printing + log meta data --- ggml.c | 17 ++++++++++++++ ggml.h | 2 ++ gguf-llama.cpp | 61 +++++++++++++++++++++++++++++++++++--------------- 3 files changed, 62 insertions(+), 18 deletions(-) diff --git a/ggml.c b/ggml.c index 261695216..77f57a3fd 100644 --- a/ggml.c +++ b/ggml.c @@ -18583,6 +18583,19 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = { }; static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10"); +static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = { + [GGUF_TYPE_UINT8] = "uint8", + [GGUF_TYPE_INT8] = "int8", + [GGUF_TYPE_UINT16] = "uint16", + [GGUF_TYPE_INT16] = "int16", + [GGUF_TYPE_UINT32] = "uint32", + [GGUF_TYPE_INT32] = "int32", + [GGUF_TYPE_FLOAT32] = "float32", + [GGUF_TYPE_BOOL] = "bool", + [GGUF_TYPE_STRING] = "string", + [GGUF_TYPE_ARRAY] = "array", +}; + union gguf_value { uint8_t uint8; int8_t int8; @@ -19017,6 +19030,10 @@ void gguf_free(struct gguf_context * ctx) { GGML_ALIGNED_FREE(ctx); } +const char * gguf_type_name(enum gguf_type type) { + return GGUF_TYPE_NAME[type]; +} + int gguf_get_version(struct gguf_context * ctx) { return ctx->header.version; } diff --git a/ggml.h b/ggml.h index 48ce71ecd..ad12c133e 100644 --- a/ggml.h +++ b/ggml.h @@ -1740,6 +1740,8 @@ extern "C" { GGML_API void gguf_free(struct gguf_context * ctx); + GGML_API const char * gguf_type_name(enum gguf_type type); + GGML_API int gguf_get_version (struct gguf_context * ctx); GGML_API size_t gguf_get_alignment (struct gguf_context * ctx); GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx); diff --git a/gguf-llama.cpp b/gguf-llama.cpp index ec64ef8dc..2b197a236 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -101,11 +101,21 @@ #define TN_FFN_DOWN "blk.%d.ffn_down.weight" #define TN_FFN_UP "blk.%d.ffn_up.weight" +#ifdef __GNUC__ +#ifdef __MINGW32__ +#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) +#else +#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) +#endif +#else +#define LLAMA_ATTRIBUTE_FORMAT(...) +#endif + // // logging // - -static void llama_log_internal(llama_log_level level, const char* format, ...); +LLAMA_ATTRIBUTE_FORMAT(2, 3) +static void llama_log_internal (llama_log_level level, const char* format, ...); static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data); #define LLAMA_LOG_INFO(...) llama_log_internal(LLAMA_LOG_LEVEL_INFO , __VA_ARGS__) @@ -130,13 +140,7 @@ static void zeros(std::ofstream & file, size_t n) { } } -#ifdef __GNUC__ -#ifdef __MINGW32__ -__attribute__((format(gnu_printf, 1, 2))) -#else -__attribute__((format(printf, 1, 2))) -#endif -#endif +LLAMA_ATTRIBUTE_FORMAT(1, 2) static std::string format(const char * fmt, ...) { va_list ap; va_list ap2; @@ -991,7 +995,7 @@ static std::string llama_format_tensor_shape(const std::vector & ne) { char buf[256]; snprintf(buf, sizeof(buf), "%5u", ne.at(0)); for (size_t i = 1; i < ne.size(); i++) { - snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i)); + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5u", ne.at(i)); } return buf; } @@ -999,13 +1003,14 @@ static std::string llama_format_tensor_shape(const std::vector & ne) { static std::string llama_format_tensor_shape(const struct ggml_tensor * t) { char buf[256]; snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]); - for (int i = 1; i < t->n_dims; i++) { - snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5" PRId64, t->ne[i]); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]); } return buf; } struct llama_model_loader { + int n_kv = 0; int n_tensors = 0; int n_created = 0; @@ -1027,11 +1032,31 @@ struct llama_model_loader { ctx_gguf = gguf_init_from_file(fname.c_str(), params); + n_kv = gguf_get_n_kv(ctx_gguf); n_tensors = gguf_get_n_tensors(ctx_gguf); + file_version = (enum llama_file_version) gguf_get_version(ctx_gguf); - LLAMA_LOG_INFO("%s: loaded %d tensors from %s (version %s)\n", - __func__, n_tensors, fname.c_str(), llama_file_version_name(file_version)); + // print meta data + // TODO: make optional + { + LLAMA_LOG_INFO("%s: loaded meta data with %d key-value paris and %d tensors from %s (version %s)\n", + __func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(file_version)); + + for (int i = 0; i < n_kv; i++) { + const char * name = gguf_get_key(ctx_gguf, i); + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + + LLAMA_LOG_INFO("%s: - %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type)); + } + + for (int i = 0; i < n_tensors; i++) { + const char * name = gguf_get_tensor_name(ctx_gguf, i); + struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name); + + LLAMA_LOG_INFO("%s: - %3d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str()); + } + } if (!llama_mmap::SUPPORTED) { LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__); @@ -1281,7 +1306,7 @@ static void llama_model_load_internal( if (kid >= 0) { \ enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \ if (ktype != (type)) { \ - throw std::runtime_error(format("key %s has wrong type: %d", key, ktype)); \ + throw std::runtime_error(format("key %s has wrong type: %s", key, gguf_type_name(ktype))); \ } \ (dst) = func(ctx, kid); \ } else if (req) { \ @@ -1325,7 +1350,7 @@ static void llama_model_load_internal( const auto n_gqa = hparams.n_gqa(); if (model.type == e_model::MODEL_65B && n_gqa == 8) { - fprintf(stderr, "%s: warning: assuming 70B model based on GQA == %d\n", __func__, n_gqa); + LLAMA_LOG_WARN("%s: assuming 70B model based on GQA == %d\n", __func__, n_gqa); model.type = e_model::MODEL_70B; } } @@ -3399,7 +3424,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2; }; - size_t idx = 0; + int idx = 0; std::vector read_data; std::vector work; @@ -3428,7 +3453,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s tensor->data = read_data.data(); model_loader->load_data_for(tensor); - LLAMA_LOG_INFO("[%4zu/%4zu] %36s - [%s], type = %6s, ", + LLAMA_LOG_INFO("[%4d/%4d] %36s - [%s], type = %6s, ", ++idx, model_loader->n_tensors, ggml_get_name(tensor), llama_format_tensor_shape(tensor).c_str(),