convert : renamed expert_weights_func to expert_gating_func

2025-01-02 12:04:58 +01:00 · 2025-01-02 12:04:58 +01:00 · 93aca64520
commit 93aca64520
parent a43d4953ba
3 changed files with 7 additions and 7 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -3859,9 +3859,9 @@ class DeepseekV2Model(Model):
        self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])

        if hparams["scoring_func"] == "sigmoid":
-            self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SIGMOID)
+            self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
        elif hparams["scoring_func"] == "softmax":
-            self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SOFTMAX)
+            self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
        else:
            raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")

--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -103,7 +103,7 @@ class Keys:
        EXPERT_SHARED_COUNT               = "{arch}.expert_shared_count"
        EXPERT_WEIGHTS_SCALE              = "{arch}.expert_weights_scale"
        EXPERT_WEIGHTS_NORM               = "{arch}.expert_weights_norm"
-        EXPERT_WEIGHTS_FUNC               = "{arch}.expert_weights_func"
+        EXPERT_GATING_FUNC                = "{arch}.expert_gating_func"
        POOLING_TYPE                      = "{arch}.pooling_type"
        LOGIT_SCALE                       = "{arch}.logit_scale"
        DECODER_START_TOKEN_ID            = "{arch}.decoder_start_token_id"
@ -1581,7 +1581,7 @@ class GGMLQuantizationType(IntEnum):
    TQ2_0   = 35


-class ExpertWeightsFuncType(IntEnum):
+class ExpertGatingFuncType(IntEnum):
    SOFTMAX  = 1
    SIGMOID  = 2

--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -26,7 +26,7 @@ from .constants import (
    RopeScalingType,
    PoolingType,
    TokenType,
-    ExpertWeightsFuncType,
+    ExpertGatingFuncType,
 )

 from .quants import quant_shape_from_byte_shape
@ -719,8 +719,8 @@ class GGUFWriter:
    def add_expert_weights_norm(self, value: bool) -> None:
        self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)

-    def add_expert_weights_func(self, value: ExpertWeightsFuncType) -> None:
-        self.add_uint32(Keys.LLM.EXPERT_WEIGHTS_FUNC.format(arch=self.arch), value.value)
+    def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
+        self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)

    def add_swin_norm(self, value: bool) -> None:
        self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)