llama : better model naming and size reporting
This commit is contained in:
parent
e7299656bd
commit
176ea716b3
3 changed files with 28 additions and 30 deletions
|
@ -94,7 +94,7 @@ print("gguf: get model metadata")
|
||||||
|
|
||||||
block_count = hparams["n_layer"]
|
block_count = hparams["n_layer"]
|
||||||
|
|
||||||
gguf_writer.add_name(last_dir)
|
gguf_writer.add_name("Falcon")
|
||||||
gguf_writer.add_context_length(2048) # not in config.json
|
gguf_writer.add_context_length(2048) # not in config.json
|
||||||
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
||||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
|
|
|
@ -733,7 +733,11 @@ class OutputFile:
|
||||||
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
self.gguf.add_name ("LLaMA")
|
ver = None
|
||||||
|
if (params.n_ctx == 4096):
|
||||||
|
ver = "v2"
|
||||||
|
|
||||||
|
self.gguf.add_name ("LLaMA" if ver == None else "LLaMA " + ver)
|
||||||
self.gguf.add_context_length (params.n_ctx)
|
self.gguf.add_context_length (params.n_ctx)
|
||||||
self.gguf.add_embedding_length (params.n_embd)
|
self.gguf.add_embedding_length (params.n_embd)
|
||||||
self.gguf.add_block_count (params.n_layer)
|
self.gguf.add_block_count (params.n_layer)
|
||||||
|
|
50
llama.cpp
50
llama.cpp
|
@ -811,6 +811,7 @@ enum e_model {
|
||||||
MODEL_7B,
|
MODEL_7B,
|
||||||
MODEL_13B,
|
MODEL_13B,
|
||||||
MODEL_30B,
|
MODEL_30B,
|
||||||
|
MODEL_40B,
|
||||||
MODEL_65B,
|
MODEL_65B,
|
||||||
MODEL_70B,
|
MODEL_70B,
|
||||||
};
|
};
|
||||||
|
@ -1489,9 +1490,10 @@ static const char * llama_model_type_name(e_model type) {
|
||||||
case MODEL_7B: return "7B";
|
case MODEL_7B: return "7B";
|
||||||
case MODEL_13B: return "13B";
|
case MODEL_13B: return "13B";
|
||||||
case MODEL_30B: return "30B";
|
case MODEL_30B: return "30B";
|
||||||
|
case MODEL_40B: return "40B";
|
||||||
case MODEL_65B: return "65B";
|
case MODEL_65B: return "65B";
|
||||||
case MODEL_70B: return "70B";
|
case MODEL_70B: return "70B";
|
||||||
default: GGML_ASSERT(false);
|
default: return "?B";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1555,40 +1557,29 @@ static void llm_load_hparams(
|
||||||
case LLM_ARCH_LLAMA:
|
case LLM_ARCH_LLAMA:
|
||||||
{
|
{
|
||||||
GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS));
|
GGUF_GET_KEY(ctx, hparams.f_norm_rms_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS));
|
||||||
|
|
||||||
|
switch (hparams.n_layer) {
|
||||||
|
case 26: model.type = e_model::MODEL_3B; break;
|
||||||
|
case 32: model.type = e_model::MODEL_7B; break;
|
||||||
|
case 40: model.type = e_model::MODEL_13B; break;
|
||||||
|
case 60: model.type = e_model::MODEL_30B; break;
|
||||||
|
case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
|
||||||
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_FALCON:
|
case LLM_ARCH_FALCON:
|
||||||
{
|
{
|
||||||
GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS));
|
GGUF_GET_KEY(ctx, hparams.f_norm_eps, gguf_get_val_f32, GGUF_TYPE_FLOAT32, true, kv(LLM_KV_ATTENTION_LAYERNORM_EPS));
|
||||||
|
|
||||||
|
switch (hparams.n_layer) {
|
||||||
|
case 32: model.type = e_model::MODEL_7B; break;
|
||||||
|
case 60: model.type = e_model::MODEL_40B; break;
|
||||||
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
default: (void)0;
|
default: (void)0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: generalize to non-LLaMA models
|
|
||||||
switch (hparams.n_layer) {
|
|
||||||
case 26: model.type = e_model::MODEL_3B; break;
|
|
||||||
case 32: model.type = e_model::MODEL_7B; break;
|
|
||||||
case 40: model.type = e_model::MODEL_13B; break;
|
|
||||||
case 60: model.type = e_model::MODEL_30B; break;
|
|
||||||
case 80: model.type = e_model::MODEL_65B; break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
if (hparams.n_layer < 32) {
|
|
||||||
model.type = e_model::MODEL_7B;
|
|
||||||
}
|
|
||||||
} break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// LLaMAv2
|
|
||||||
// TODO: probably not needed
|
|
||||||
{
|
|
||||||
const auto n_gqa = hparams.n_gqa();
|
|
||||||
|
|
||||||
if (model.type == e_model::MODEL_65B && n_gqa == 8) {
|
|
||||||
LLAMA_LOG_WARN("%s: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
|
|
||||||
model.type = e_model::MODEL_70B;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
model.ftype = ml.ftype;
|
model.ftype = ml.ftype;
|
||||||
|
|
||||||
hparams.n_ctx = n_ctx;
|
hparams.n_ctx = n_ctx;
|
||||||
|
@ -5015,7 +5006,10 @@ int llama_model_n_embd(const struct llama_model * model) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
|
int llama_model_type(const struct llama_model * model, char * buf, size_t buf_size) {
|
||||||
return snprintf(buf, buf_size, "LLaMA %s %s", llama_model_type_name(model->type), llama_model_ftype_name(model->ftype).c_str());
|
return snprintf(buf, buf_size, "%s %s %s",
|
||||||
|
model->name.c_str(),
|
||||||
|
llama_model_type_name(model->type),
|
||||||
|
llama_model_ftype_name(model->ftype).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
int llama_model_quantize(
|
int llama_model_quantize(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue