Loading works up to LayerNorm2D

This commit is contained in:
S 2024-04-04 22:30:17 +01:00 committed by S
parent fbab98497b
commit e4b2e2d339
2 changed files with 25 additions and 3 deletions

View file

@ -2344,8 +2344,7 @@ class CommandR2Model(Model):
# max_position_embeddings = 8192 in config.json but model was actually
# trained on 128k context length
if "model_max_length" not in self.hparams:
self.hparams["model_max_length"] = 131072
self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]
def set_gguf_parameters(self):

View file

@ -924,6 +924,8 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
},
},
{
@ -5404,6 +5406,12 @@ static bool llm_load_tensors(
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
if(n_layer >= 64)
{
layer.attn_q_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {hparams.n_embd_head_k, hparams.n_head});
layer.attn_k_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {hparams.n_embd_head_k, hparams.n_head_kv});
}
layer.wq = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd});
layer.wk = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa});
layer.wv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa});
@ -9452,6 +9460,21 @@ struct llm_build_context {
cb(Vcur, "Vcur", il);
}
if(model.layers[il].attn_q_norm)
{
Qcur = llm_build_norm(ctx0, Qcur, hparams,
model.layers[il].attn_q_norm,
NULL,
LLM_NORM, cb, il);
cb(Qcur, "Qcur", il);
Kcur = llm_build_norm(ctx0, Kcur, hparams,
model.layers[il].attn_k_norm,
NULL,
LLM_NORM, cb, il);
cb(Kcur, "Kcur", il);
}
Qcur = ggml_rope_custom(
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,