llama : optimize DeepSeek MLA implementation

This commit is contained in:
Stanisław Szymczyk 2025-01-25 18:10:22 +01:00
parent f0ce53f158
commit de538aa329
10 changed files with 96 additions and 41 deletions

View file

@ -586,6 +586,14 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_K_B: (
"model.layers.{bid}.self_attn.k_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_V_B: (
"model.layers.{bid}.self_attn.v_b_proj", # deepseek2
),
MODEL_TENSOR.ATTN_Q_A_NORM: (
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
),