convert : renamed expert_weights_func to expert_gating_func

This commit is contained in:
Stanisław Szymczyk 2025-01-02 12:04:58 +01:00
parent a43d4953ba
commit 93aca64520
3 changed files with 7 additions and 7 deletions

View file

@ -3859,9 +3859,9 @@ class DeepseekV2Model(Model):
self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"]) self.gguf_writer.add_expert_weights_norm(hparams["norm_topk_prob"])
if hparams["scoring_func"] == "sigmoid": if hparams["scoring_func"] == "sigmoid":
self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SIGMOID) self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
elif hparams["scoring_func"] == "softmax": elif hparams["scoring_func"] == "softmax":
self.gguf_writer.add_expert_weights_func(gguf.ExpertWeightsFuncType.SOFTMAX) self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SOFTMAX)
else: else:
raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}") raise ValueError(f"Unsupported scoring_func value: {hparams['scoring_func']}")

View file

@ -103,7 +103,7 @@ class Keys:
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count" EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale" EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm" EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
EXPERT_WEIGHTS_FUNC = "{arch}.expert_weights_func" EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
POOLING_TYPE = "{arch}.pooling_type" POOLING_TYPE = "{arch}.pooling_type"
LOGIT_SCALE = "{arch}.logit_scale" LOGIT_SCALE = "{arch}.logit_scale"
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id" DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@ -1581,7 +1581,7 @@ class GGMLQuantizationType(IntEnum):
TQ2_0 = 35 TQ2_0 = 35
class ExpertWeightsFuncType(IntEnum): class ExpertGatingFuncType(IntEnum):
SOFTMAX = 1 SOFTMAX = 1
SIGMOID = 2 SIGMOID = 2

View file

@ -26,7 +26,7 @@ from .constants import (
RopeScalingType, RopeScalingType,
PoolingType, PoolingType,
TokenType, TokenType,
ExpertWeightsFuncType, ExpertGatingFuncType,
) )
from .quants import quant_shape_from_byte_shape from .quants import quant_shape_from_byte_shape
@ -719,8 +719,8 @@ class GGUFWriter:
def add_expert_weights_norm(self, value: bool) -> None: def add_expert_weights_norm(self, value: bool) -> None:
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value) self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
def add_expert_weights_func(self, value: ExpertWeightsFuncType) -> None: def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
self.add_uint32(Keys.LLM.EXPERT_WEIGHTS_FUNC.format(arch=self.arch), value.value) self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
def add_swin_norm(self, value: bool) -> None: def add_swin_norm(self, value: bool) -> None:
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value) self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)