diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index d3e8ec1f6..b05952116 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -249,6 +249,8 @@ class Model: return gguf.MODEL_ARCH.FALCON if arch == "GPTBigCodeForCausalLM": return gguf.MODEL_ARCH.STARCODER + if arch == "Starcoder2ForCausalLM": + return gguf.MODEL_ARCH.LLAMA if arch == "GPTRefactForCausalLM": return gguf.MODEL_ARCH.REFACT if arch == "PersimmonForCausalLM": diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 861003776..db2ec9704 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -210,6 +210,7 @@ class TensorNameMap: "model.layers.layers.{bid}.mlp.up_proj", # plamo "model.layers.{bid}.feed_forward.w3", # internlm2 "encoder.layers.{bid}.mlp.fc11", # nomic-bert + "model.layers.{bid}.mlp.c_fc", # starcoder2 ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -256,6 +257,7 @@ class TensorNameMap: "model.layers.layers.{bid}.mlp.down_proj", # plamo "model.layers.{bid}.feed_forward.w2", # internlm2 "encoder.layers.{bid}.mlp.fc2", # nomic-bert + "model.layers.{bid}.mlp.c_proj", # starcoder2 ), MODEL_TENSOR.FFN_DOWN_EXP: (