fix order of deepseek and deepseek2 in constants; mark shared exp as deepseek arch need

This commit is contained in:
Valentin Mamedov 2024-12-15 13:53:42 +07:00
parent b32159c8a7
commit 43c679507f
2 changed files with 5 additions and 5 deletions

View file

@ -247,8 +247,8 @@ class MODEL_ARCH(IntEnum):
OLMOE = auto()
OPENELM = auto()
ARCTIC = auto()
DEEPSEEK2 = auto()
DEEPSEEK = auto()
DEEPSEEK2 = auto()
CHATGLM = auto()
BITNET = auto()
T5 = auto()
@ -410,8 +410,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.OLMOE: "olmoe",
MODEL_ARCH.OPENELM: "openelm",
MODEL_ARCH.ARCTIC: "arctic",
MODEL_ARCH.DEEPSEEK2: "deepseek2",
MODEL_ARCH.DEEPSEEK: "deepseek",
MODEL_ARCH.DEEPSEEK2: "deepseek2",
MODEL_ARCH.CHATGLM: "chatglm",
MODEL_ARCH.BITNET: "bitnet",
MODEL_ARCH.T5: "t5",

View file

@ -306,7 +306,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_UP_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
),
# AWQ-activation gate
@ -338,7 +338,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_GATE_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
),
# Feed-forward down
@ -379,7 +379,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_DOWN_SHEXP: (
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
),
MODEL_TENSOR.ATTN_Q_NORM: (