gguf-py, llama : rename expert_weights to exp_probs in tensor and variable names

2025-01-03 13:51:14 +01:00 · 2025-01-03 13:51:14 +01:00 · 140eb29264
commit 140eb29264
parent d2f784d50d
3 changed files with 13 additions and 13 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -314,7 +314,7 @@ class MODEL_TENSOR(IntEnum):
    FFN_GATE_SHEXP       = auto()
    FFN_DOWN_SHEXP       = auto()
    FFN_UP_SHEXP         = auto()
-    FFN_EXPERT_WEIGHTS_B = auto()
+    FFN_EXP_PROBS_B      = auto()
    ATTN_Q_NORM          = auto()
    ATTN_K_NORM          = auto()
    LAYER_OUT_NORM       = auto()
@ -499,7 +499,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
    MODEL_TENSOR.FFN_GATE_EXP:              "blk.{bid}.ffn_gate_exps",
    MODEL_TENSOR.FFN_DOWN_EXP:              "blk.{bid}.ffn_down_exps",
    MODEL_TENSOR.FFN_UP_EXP:                "blk.{bid}.ffn_up_exps",
-    MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B:      "blk.{bid}.expert_weights_b",
+    MODEL_TENSOR.FFN_EXP_PROBS_B:           "blk.{bid}.exp_probs_b",
    MODEL_TENSOR.LAYER_OUT_NORM:            "blk.{bid}.layer_output_norm",
    MODEL_TENSOR.SSM_IN:                    "blk.{bid}.ssm_in",
    MODEL_TENSOR.SSM_CONV1D:                "blk.{bid}.ssm_conv1d",
@ -1280,7 +1280,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.FFN_GATE_SHEXP,
        MODEL_TENSOR.FFN_DOWN_SHEXP,
        MODEL_TENSOR.FFN_UP_SHEXP,
-        MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B,
+        MODEL_TENSOR.FFN_EXP_PROBS_B,
    ],
    MODEL_ARCH.CHATGLM : [
        MODEL_TENSOR.TOKEN_EMBD,
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@ -276,7 +276,7 @@ class TensorNameMap:
            "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
        ),
-        MODEL_TENSOR.FFN_EXPERT_WEIGHTS_B: (
+        MODEL_TENSOR.FFN_EXP_PROBS_B: (
            "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
        ),
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -564,7 +564,7 @@ enum llm_tensor {
    LLM_TENSOR_FFN_DOWN_SHEXP,
    LLM_TENSOR_FFN_GATE_SHEXP,
    LLM_TENSOR_FFN_UP_SHEXP,
-    LLM_TENSOR_FFN_EXPERT_WEIGHTS_B,
+    LLM_TENSOR_FFN_EXP_PROBS_B,
    LLM_TENSOR_ATTN_Q_NORM,
    LLM_TENSOR_ATTN_K_NORM,
    LLM_TENSOR_LAYER_OUT_NORM,
@ -1434,7 +1434,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
            { LLM_TENSOR_FFN_GATE_SHEXP,     "blk.%d.ffn_gate_shexp" },
            { LLM_TENSOR_FFN_DOWN_SHEXP,     "blk.%d.ffn_down_shexp" },
            { LLM_TENSOR_FFN_UP_SHEXP,       "blk.%d.ffn_up_shexp" },
-            { LLM_TENSOR_FFN_EXPERT_WEIGHTS_B, "blk.%d.expert_weights_b" },
+            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
        },
    },
    {
@ -2934,7 +2934,7 @@ struct llama_layer {
    struct ggml_tensor * ffn_down_b = nullptr; // b2
    struct ggml_tensor * ffn_up_b   = nullptr; // b3
    struct ggml_tensor * ffn_act    = nullptr;
-    struct ggml_tensor * ffn_expert_weights_bias = nullptr;
+    struct ggml_tensor * ffn_exp_probs_b = nullptr;
    // mamba proj
    struct ggml_tensor * ssm_in  = nullptr;
@ -7480,7 +7480,7 @@ static const std::map<llm_tensor, llm_tensor_info> llm_tensor_info_mapping = {
    {LLM_TENSOR_FFN_DOWN_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
    {LLM_TENSOR_FFN_GATE_EXPS,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
    {LLM_TENSOR_FFN_UP_EXPS,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
-    {LLM_TENSOR_FFN_EXPERT_WEIGHTS_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
+    {LLM_TENSOR_FFN_EXP_PROBS_B,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
    // this tensor is loaded for T5, but never used
    {LLM_TENSOR_DEC_CROSS_ATTN_REL_B,       {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
    {LLM_TENSOR_CONV1D,                     {LLM_TENSOR_LAYER_INPUT,     GGML_OP_IM2COL}},
@ -9283,7 +9283,7 @@ static bool llm_load_tensors(
                            layer.ffn_up   = create_tensor(tn(LLM_TENSOR_FFN_UP,   "weight", i), {n_embd,   n_ff}, 0);
                        } else {
                            layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0);
-                            layer.ffn_expert_weights_bias = create_tensor(tn(LLM_TENSOR_FFN_EXPERT_WEIGHTS_B, "bias", i), {n_expert}, llama_model_loader::TENSOR_NOT_REQUIRED);
+                            layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, llama_model_loader::TENSOR_NOT_REQUIRED);
                            if (n_expert == 0) {
                                throw std::runtime_error("n_expert must be > 0");
@ -10285,22 +10285,22 @@ llm_expert_gating_func_type   gating_op,
        case LLM_EXPERT_GATING_FUNC_SOFTMAX:
            {
                probs = ggml_soft_max(ctx, logits); // [n_expert, n_tokens]
                cb(probs, "ffn_moe_probs", il);
            } break;
        case LLM_EXPERT_GATING_FUNC_SIGMOID:
            {
                probs = ggml_sigmoid(ctx, logits); // [n_expert, n_tokens]
                cb(probs, "ffn_moe_sigm", il);
            } break;
        default:
            GGML_ABORT("fatal error");
    }
    cb(probs, "ffn_moe_probs", il);
    // add experts selection bias - introduced in DeepSeek V3
    // leave probs unbiased as it's later used to get expert weights
    ggml_tensor * selection_probs = probs;
    if (expert_weights_b != nullptr) {
        selection_probs = ggml_add(ctx, probs, expert_weights_b);
-        cb(selection_probs, "ffn_moe_sigm_biased", il);
+        cb(selection_probs, "ffn_moe_probs_biased", il);
    }
    // select experts
@ -16241,7 +16241,7 @@ struct llm_build_context {
                            model.layers[il].ffn_up_exps,
                            model.layers[il].ffn_gate_exps,
                            model.layers[il].ffn_down_exps,
-                            model.layers[il].ffn_expert_weights_bias,
+                            model.layers[il].ffn_exp_probs_b,
                            n_expert, n_expert_used,
                            LLM_FFN_SILU, hparams.expert_weights_norm,
                            true, hparams.expert_weights_scale,