convert : renamed expert_weights_func to expert_gating_func

This commit is contained in:
Stanisław Szymczyk 2025-01-02 12:04:58 +01:00
parent a43d4953ba
commit 93aca64520
3 changed files with 7 additions and 7 deletions

View file

@ -3859,9 +3859,9 @@ class DeepseekV2Model(Model):
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
if hparams["scoring_func"] == "sigmoid":
self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SIGMOID)
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
elif hparams["scoring_func"] == "softmax":
self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SOFTMAX)
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
else:
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")

View file

@ -103,7 +103,7 @@ class Keys:
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
EXPERT_WEIGHTS_FUNC = "{arch}.expert_weights_func"
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
POOLING_TYPE = "{arch}.pooling_type"
LOGIT_SCALE = "{arch}.logit_scale"
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@ -1581,7 +1581,7 @@ class GGMLQuantizationType(IntEnum):
TQ2_0 = 35
class ExpertWeightsFuncType(IntEnum):
class ExpertGatingFuncType(IntEnum):
SOFTMAX = 1
SIGMOID = 2

View file

@ -26,7 +26,7 @@ from .constants import (
RopeScalingType,
PoolingType,
TokenType,
ExpertWeightsFuncType,
ExpertGatingFuncType,
)
from .quants import quant_shape_from_byte_shape
@ -719,8 +719,8 @@ class GGUFWriter:
def add_expert_weights_norm(self, value: bool) -> None:
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
def add_expert_weights_func(self, value: ExpertWeightsFuncType) -> None:
self.add_uint32(Keys.LLM.EXPERT_WEIGHTS_FUNC.format(arch=self.arch), value.value)
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
def add_swin_norm(self, value: bool) -> None:
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)