llama : optimize DeepSeek MLA implementation
This commit is contained in:
parent
f0ce53f158
commit
de538aa329
10 changed files with 96 additions and 41 deletions
|
@ -586,6 +586,14 @@ class TensorNameMap:
|
|||
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_K_B: (
|
||||
"model.layers.{bid}.self_attn.k_b_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_V_B: (
|
||||
"model.layers.{bid}.self_attn.v_b_proj", # deepseek2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
||||
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
||||
),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue