llama : add support for the cohere2 model architecture (#10900)
This commit is contained in:
parent
78c6785175
commit
46be942214
6 changed files with 221 additions and 0 deletions
|
@ -786,6 +786,16 @@ void llm_load_hparams(llama_model_loader & ml, llama_model & model) {
|
|||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_COHERE2:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
|
||||
ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 32: model.type = e_model::MODEL_8B; break;
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
case LLM_ARCH_DBRX:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||
|
@ -2031,6 +2041,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|||
case LLM_ARCH_MINICPM:
|
||||
case LLM_ARCH_XVERSE:
|
||||
case LLM_ARCH_COMMAND_R:
|
||||
case LLM_ARCH_COHERE2:
|
||||
case LLM_ARCH_OLMO:
|
||||
case LLM_ARCH_ARCTIC:
|
||||
case LLM_ARCH_DEEPSEEK:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue