From 43c679507f20b435219d89df253402e57fd70067 Mon Sep 17 00:00:00 2001 From: Valentin Mamedov Date: Sun, 15 Dec 2024 13:53:42 +0700 Subject: [PATCH] fix order of deepseek and deepseek2 in constants; mark shared exp as deepseek arch need --- gguf-py/gguf/constants.py | 4 ++-- gguf-py/gguf/tensor_mapping.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index e5bf1a16e..18af76d4a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -247,8 +247,8 @@ class MODEL_ARCH(IntEnum): OLMOE = auto() OPENELM = auto() ARCTIC = auto() - DEEPSEEK2 = auto() DEEPSEEK = auto() + DEEPSEEK2 = auto() CHATGLM = auto() BITNET = auto() T5 = auto() @@ -410,8 +410,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.OLMOE: "olmoe", MODEL_ARCH.OPENELM: "openelm", MODEL_ARCH.ARCTIC: "arctic", - MODEL_ARCH.DEEPSEEK2: "deepseek2", MODEL_ARCH.DEEPSEEK: "deepseek", + MODEL_ARCH.DEEPSEEK2: "deepseek2", MODEL_ARCH.CHATGLM: "chatglm", MODEL_ARCH.BITNET: "bitnet", MODEL_ARCH.T5: "t5", diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index f0a7b6478..573d0282e 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -306,7 +306,7 @@ class TensorNameMap: MODEL_TENSOR.FFN_UP_SHEXP: ( "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe - "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2 + "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2 ), # AWQ-activation gate @@ -338,7 +338,7 @@ class TensorNameMap: MODEL_TENSOR.FFN_GATE_SHEXP: ( "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe - "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2 + "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2 ), # Feed-forward down @@ -379,7 +379,7 @@ class TensorNameMap: MODEL_TENSOR.FFN_DOWN_SHEXP: ( "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe - "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2 + "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2 ), MODEL_TENSOR.ATTN_Q_NORM: (