llama : optimize DeepSeek MLA implementation

2025-01-25 18:10:22 +01:00 · 2025-01-25 18:10:22 +01:00 · de538aa329
commit de538aa329
parent f0ce53f158
10 changed files with 96 additions and 41 deletions
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@ -586,6 +586,14 @@ class TensorNameMap:
            "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
        ),

+        MODEL_TENSOR.ATTN_K_B: (
+            "model.layers.{bid}.self_attn.k_b_proj",  # deepseek2
+        ),
+
+        MODEL_TENSOR.ATTN_V_B: (
+            "model.layers.{bid}.self_attn.v_b_proj",  # deepseek2
+        ),
+
        MODEL_TENSOR.ATTN_Q_A_NORM: (
            "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
        ),