compatible with old and new minicpm versions
This commit is contained in:
parent
e913ac9c38
commit
9ecc666b94
4 changed files with 19 additions and 2 deletions
|
@ -1097,6 +1097,7 @@ class MiniCPMModel(Model):
|
|||
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"])
|
||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||
self.gguf_writer.add_file_type(self.ftype)
|
||||
self.gguf_writer.add_tie_lm_head(self.hparams["tie_lm_head"])
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_hf()
|
||||
|
|
|
@ -43,6 +43,7 @@ class Keys:
|
|||
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
||||
POOLING_TYPE = "{arch}.pooling_type"
|
||||
LOGIT_SCALE = "{arch}.logit_scale"
|
||||
TIE_LM_HEAD = "{arch}.tie_lm_head"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "{arch}.attention.head_count"
|
||||
|
@ -805,6 +806,7 @@ KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
|
|||
KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH
|
||||
KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
|
||||
KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
|
||||
KEY_TIE_LM_HEAD = Keys.LLM.TIE_LM_HEAD
|
||||
|
||||
# attention
|
||||
KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
|
||||
|
|
|
@ -343,6 +343,9 @@ class GGUFWriter:
|
|||
def add_parallel_residual(self, use: bool) -> None:
|
||||
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
|
||||
|
||||
def add_tie_lm_head(self, tie_lm_head: bool) -> None:
|
||||
self.add_bool(Keys.LLM.TIE_LM_HEAD.format(arch=self.arch), tie_lm_head)
|
||||
|
||||
def add_head_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
|
||||
|
||||
|
|
11
llama.cpp
11
llama.cpp
|
@ -276,6 +276,7 @@ enum llm_kv {
|
|||
LLM_KV_EXPERT_USED_COUNT,
|
||||
LLM_KV_POOLING_TYPE,
|
||||
LLM_KV_LOGIT_SCALE,
|
||||
LLM_KV_TIE_LM_HEAD,
|
||||
|
||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||
|
@ -320,6 +321,7 @@ enum llm_kv {
|
|||
LLM_KV_TOKENIZER_ADD_PREFIX,
|
||||
LLM_KV_TOKENIZER_HF_JSON,
|
||||
LLM_KV_TOKENIZER_RWKV,
|
||||
|
||||
};
|
||||
|
||||
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
|
@ -345,6 +347,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||
{ LLM_KV_EXPERT_USED_COUNT, "%s.expert_used_count" },
|
||||
{ LLM_KV_POOLING_TYPE , "%s.pooling_type" },
|
||||
{ LLM_KV_LOGIT_SCALE, "%s.logit_scale" },
|
||||
{ LLM_KV_TIE_LM_HEAD, "%s.tie_lm_head" },
|
||||
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
||||
|
@ -1707,6 +1710,7 @@ struct llama_hparams {
|
|||
|
||||
bool causal_attn = true;
|
||||
bool need_kq_pos = false;
|
||||
bool tie_lm_head = true;
|
||||
|
||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
|
||||
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;
|
||||
|
@ -3503,6 +3507,7 @@ static void llm_load_hparams(
|
|||
ml.get_key(LLM_KV_BLOCK_COUNT, hparams.n_layer);
|
||||
ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert, false);
|
||||
ml.get_key(LLM_KV_EXPERT_USED_COUNT, hparams.n_expert_used, false);
|
||||
ml.get_key(LLM_KV_TIE_LM_HEAD, hparams.tie_lm_head, false);
|
||||
|
||||
GGML_ASSERT(hparams.n_expert <= LLAMA_MAX_EXPERTS);
|
||||
GGML_ASSERT(hparams.n_expert_used <= hparams.n_expert);
|
||||
|
@ -4375,7 +4380,9 @@ static bool llm_load_tensors(
|
|||
case LLM_ARCH_MINICPM:
|
||||
{
|
||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
if (!hparams.tie_lm_head){
|
||||
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, false);
|
||||
}
|
||||
|
||||
// output
|
||||
{
|
||||
|
@ -8700,7 +8707,11 @@ struct llm_build_context {
|
|||
cb(cur, "lmhead_scaling", -1);
|
||||
|
||||
// lm_head
|
||||
if (hparams.tie_lm_head){
|
||||
cur = ggml_mul_mat(ctx0, model.tok_embd, cur);
|
||||
}else{
|
||||
cur = ggml_mul_mat(ctx0, model.output, cur);
|
||||
}
|
||||
cb(cur, "result_output", -1);
|
||||
|
||||
ggml_build_forward_expand(gf, cur);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue