llama : add support for GLM-Edge and GLM-Edge-V series models (#10573)
* add glm edge chat model * use config partial_rotary_factor as rope ratio * support for glm edge model * vision model support * remove debug info * fix format * llava.cpp trailing whitespace * remove unused AutoTokenizer * Update src/llama.cpp for not contain <|end|> or </s> Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * add edge template * fix chat template * fix confict * fix confict * fix ci err * fix format err * fix template err * 9b hf chat support * format * format clip.cpp * fix format * Apply suggestions from code review * Apply suggestions from code review * Update examples/llava/clip.cpp * fix format * minor : style --------- Co-authored-by: liyuhang <yuhang.li@zhipuai.cn> Co-authored-by: piDack <pcdack@hotmail.co> Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> Co-authored-by: liyuhang <yuhang.li@aminer.cn> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
53debe6f3c
commit
0cec062a63
15 changed files with 568 additions and 67 deletions
|
@ -1093,8 +1093,20 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
switch (hparams.n_layer) {
|
||||
case 28: type = LLM_TYPE_6B; break;
|
||||
case 40: type = LLM_TYPE_9B; break;
|
||||
case 28: {
|
||||
if (hparams.n_head(0) == 16) {
|
||||
type = LLM_TYPE_1_5B;
|
||||
} else {
|
||||
type = LLM_TYPE_6B;
|
||||
}
|
||||
} break;
|
||||
case 40: {
|
||||
if (hparams.n_head(0) == 24) {
|
||||
type = LLM_TYPE_4B;
|
||||
} else {
|
||||
type = LLM_TYPE_9B;
|
||||
}
|
||||
} break;
|
||||
default: type = LLM_TYPE_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
|
@ -3068,9 +3080,17 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||
auto & layer = layers[i];
|
||||
|
||||
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
|
||||
layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
|
||||
layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, 0);
|
||||
layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, 0);
|
||||
if (layer.wqkv == nullptr) {
|
||||
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0);
|
||||
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0);
|
||||
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0);
|
||||
layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, llama_model_loader::TENSOR_NOT_REQUIRED);
|
||||
}
|
||||
|
||||
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue