llama : Add support for DeepSeek V3 (#11049)
* convert : extend DEEPSEEK2 model architecture to support DeepseekV3ForCausalLM by adding EXPERT_WEIGHTS_NORM and EXPERT_GATING_FUNC model parameters and FFN_EXP_PROBS_B tensor type * vocab : add DeepSeek V3 pre-tokenizer regexes * unicode : handle ACCENT_MARK and SYMBOL categories in regex * llama : add DeepSeek V3 chat template, handle new model parameters and tensor types --------- Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>
This commit is contained in:
parent
f922a9c542
commit
9394bbd484
16 changed files with 162 additions and 5 deletions
|
@ -96,6 +96,8 @@ enum llm_kv {
|
|||
LLM_KV_EXPERT_USED_COUNT,
|
||||
LLM_KV_EXPERT_SHARED_COUNT,
|
||||
LLM_KV_EXPERT_WEIGHTS_SCALE,
|
||||
LLM_KV_EXPERT_WEIGHTS_NORM,
|
||||
LLM_KV_EXPERT_GATING_FUNC,
|
||||
LLM_KV_POOLING_TYPE,
|
||||
LLM_KV_LOGIT_SCALE,
|
||||
LLM_KV_DECODER_START_TOKEN_ID,
|
||||
|
@ -231,6 +233,7 @@ enum llm_tensor {
|
|||
LLM_TENSOR_FFN_DOWN_SHEXP,
|
||||
LLM_TENSOR_FFN_GATE_SHEXP,
|
||||
LLM_TENSOR_FFN_UP_SHEXP,
|
||||
LLM_TENSOR_FFN_EXP_PROBS_B,
|
||||
LLM_TENSOR_ATTN_Q_NORM,
|
||||
LLM_TENSOR_ATTN_K_NORM,
|
||||
LLM_TENSOR_LAYER_OUT_NORM,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue