llama : improve printing + log meta data
This commit is contained in:
parent
f634b292c9
commit
e524750a6c
3 changed files with 62 additions and 18 deletions
17
ggml.c
17
ggml.c
|
@ -18583,6 +18583,19 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
||||||
};
|
};
|
||||||
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
|
||||||
|
|
||||||
|
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
||||||
|
[GGUF_TYPE_UINT8] = "uint8",
|
||||||
|
[GGUF_TYPE_INT8] = "int8",
|
||||||
|
[GGUF_TYPE_UINT16] = "uint16",
|
||||||
|
[GGUF_TYPE_INT16] = "int16",
|
||||||
|
[GGUF_TYPE_UINT32] = "uint32",
|
||||||
|
[GGUF_TYPE_INT32] = "int32",
|
||||||
|
[GGUF_TYPE_FLOAT32] = "float32",
|
||||||
|
[GGUF_TYPE_BOOL] = "bool",
|
||||||
|
[GGUF_TYPE_STRING] = "string",
|
||||||
|
[GGUF_TYPE_ARRAY] = "array",
|
||||||
|
};
|
||||||
|
|
||||||
union gguf_value {
|
union gguf_value {
|
||||||
uint8_t uint8;
|
uint8_t uint8;
|
||||||
int8_t int8;
|
int8_t int8;
|
||||||
|
@ -19017,6 +19030,10 @@ void gguf_free(struct gguf_context * ctx) {
|
||||||
GGML_ALIGNED_FREE(ctx);
|
GGML_ALIGNED_FREE(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * gguf_type_name(enum gguf_type type) {
|
||||||
|
return GGUF_TYPE_NAME[type];
|
||||||
|
}
|
||||||
|
|
||||||
int gguf_get_version(struct gguf_context * ctx) {
|
int gguf_get_version(struct gguf_context * ctx) {
|
||||||
return ctx->header.version;
|
return ctx->header.version;
|
||||||
}
|
}
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -1740,6 +1740,8 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API void gguf_free(struct gguf_context * ctx);
|
GGML_API void gguf_free(struct gguf_context * ctx);
|
||||||
|
|
||||||
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
||||||
|
|
||||||
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
||||||
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
||||||
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
||||||
|
|
|
@ -101,11 +101,21 @@
|
||||||
#define TN_FFN_DOWN "blk.%d.ffn_down.weight"
|
#define TN_FFN_DOWN "blk.%d.ffn_down.weight"
|
||||||
#define TN_FFN_UP "blk.%d.ffn_up.weight"
|
#define TN_FFN_UP "blk.%d.ffn_up.weight"
|
||||||
|
|
||||||
|
#ifdef __GNUC__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
||||||
|
#else
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LLAMA_ATTRIBUTE_FORMAT(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
//
|
//
|
||||||
// logging
|
// logging
|
||||||
//
|
//
|
||||||
|
LLAMA_ATTRIBUTE_FORMAT(2, 3)
|
||||||
static void llama_log_internal(llama_log_level level, const char* format, ...);
|
static void llama_log_internal (llama_log_level level, const char* format, ...);
|
||||||
static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data);
|
static void llama_log_callback_default(llama_log_level level, const char * text, void * user_data);
|
||||||
|
|
||||||
#define LLAMA_LOG_INFO(...) llama_log_internal(LLAMA_LOG_LEVEL_INFO , __VA_ARGS__)
|
#define LLAMA_LOG_INFO(...) llama_log_internal(LLAMA_LOG_LEVEL_INFO , __VA_ARGS__)
|
||||||
|
@ -130,13 +140,7 @@ static void zeros(std::ofstream & file, size_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __GNUC__
|
LLAMA_ATTRIBUTE_FORMAT(1, 2)
|
||||||
#ifdef __MINGW32__
|
|
||||||
__attribute__((format(gnu_printf, 1, 2)))
|
|
||||||
#else
|
|
||||||
__attribute__((format(printf, 1, 2)))
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
static std::string format(const char * fmt, ...) {
|
static std::string format(const char * fmt, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_list ap2;
|
va_list ap2;
|
||||||
|
@ -991,7 +995,7 @@ static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
|
||||||
char buf[256];
|
char buf[256];
|
||||||
snprintf(buf, sizeof(buf), "%5u", ne.at(0));
|
snprintf(buf, sizeof(buf), "%5u", ne.at(0));
|
||||||
for (size_t i = 1; i < ne.size(); i++) {
|
for (size_t i = 1; i < ne.size(); i++) {
|
||||||
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i));
|
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5u", ne.at(i));
|
||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
@ -999,13 +1003,14 @@ static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
|
||||||
static std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
|
static std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
|
||||||
char buf[256];
|
char buf[256];
|
||||||
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
|
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
|
||||||
for (int i = 1; i < t->n_dims; i++) {
|
for (int i = 1; i < GGML_MAX_DIMS; i++) {
|
||||||
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5" PRId64, t->ne[i]);
|
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
|
||||||
}
|
}
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_model_loader {
|
struct llama_model_loader {
|
||||||
|
int n_kv = 0;
|
||||||
int n_tensors = 0;
|
int n_tensors = 0;
|
||||||
int n_created = 0;
|
int n_created = 0;
|
||||||
|
|
||||||
|
@ -1027,11 +1032,31 @@ struct llama_model_loader {
|
||||||
|
|
||||||
ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
ctx_gguf = gguf_init_from_file(fname.c_str(), params);
|
||||||
|
|
||||||
|
n_kv = gguf_get_n_kv(ctx_gguf);
|
||||||
n_tensors = gguf_get_n_tensors(ctx_gguf);
|
n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||||
|
|
||||||
file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);
|
file_version = (enum llama_file_version) gguf_get_version(ctx_gguf);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: loaded %d tensors from %s (version %s)\n",
|
// print meta data
|
||||||
__func__, n_tensors, fname.c_str(), llama_file_version_name(file_version));
|
// TODO: make optional
|
||||||
|
{
|
||||||
|
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value paris and %d tensors from %s (version %s)\n",
|
||||||
|
__func__, n_kv, n_tensors, fname.c_str(), llama_file_version_name(file_version));
|
||||||
|
|
||||||
|
for (int i = 0; i < n_kv; i++) {
|
||||||
|
const char * name = gguf_get_key(ctx_gguf, i);
|
||||||
|
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: - %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < n_tensors; i++) {
|
||||||
|
const char * name = gguf_get_tensor_name(ctx_gguf, i);
|
||||||
|
struct ggml_tensor * meta = ggml_get_tensor(ctx_meta, name);
|
||||||
|
|
||||||
|
LLAMA_LOG_INFO("%s: - %3d: %32s %-8s [ %s ]\n", __func__, i, name, ggml_type_name(meta->type), llama_format_tensor_shape(meta).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!llama_mmap::SUPPORTED) {
|
if (!llama_mmap::SUPPORTED) {
|
||||||
LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__);
|
LLAMA_LOG_WARN("%s: mmap is not supported on this platform\n", __func__);
|
||||||
|
@ -1281,7 +1306,7 @@ static void llama_model_load_internal(
|
||||||
if (kid >= 0) { \
|
if (kid >= 0) { \
|
||||||
enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \
|
enum gguf_type ktype = gguf_get_kv_type(ctx, kid); \
|
||||||
if (ktype != (type)) { \
|
if (ktype != (type)) { \
|
||||||
throw std::runtime_error(format("key %s has wrong type: %d", key, ktype)); \
|
throw std::runtime_error(format("key %s has wrong type: %s", key, gguf_type_name(ktype))); \
|
||||||
} \
|
} \
|
||||||
(dst) = func(ctx, kid); \
|
(dst) = func(ctx, kid); \
|
||||||
} else if (req) { \
|
} else if (req) { \
|
||||||
|
@ -1325,7 +1350,7 @@ static void llama_model_load_internal(
|
||||||
const auto n_gqa = hparams.n_gqa();
|
const auto n_gqa = hparams.n_gqa();
|
||||||
|
|
||||||
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
|
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
|
||||||
fprintf(stderr, "%s: warning: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
|
LLAMA_LOG_WARN("%s: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
|
||||||
model.type = e_model::MODEL_70B;
|
model.type = e_model::MODEL_70B;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3399,7 +3424,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2;
|
return i_layer < num_layers/8 || i_layer >= 7*num_layers/8 || (i_layer - num_layers/8)%3 == 2;
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t idx = 0;
|
int idx = 0;
|
||||||
|
|
||||||
std::vector<uint8_t> read_data;
|
std::vector<uint8_t> read_data;
|
||||||
std::vector<uint8_t> work;
|
std::vector<uint8_t> work;
|
||||||
|
@ -3428,7 +3453,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
tensor->data = read_data.data();
|
tensor->data = read_data.data();
|
||||||
model_loader->load_data_for(tensor);
|
model_loader->load_data_for(tensor);
|
||||||
|
|
||||||
LLAMA_LOG_INFO("[%4zu/%4zu] %36s - [%s], type = %6s, ",
|
LLAMA_LOG_INFO("[%4d/%4d] %36s - [%s], type = %6s, ",
|
||||||
++idx, model_loader->n_tensors,
|
++idx, model_loader->n_tensors,
|
||||||
ggml_get_name(tensor),
|
ggml_get_name(tensor),
|
||||||
llama_format_tensor_shape(tensor).c_str(),
|
llama_format_tensor_shape(tensor).c_str(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue