convert : for Mamba, also consider the "MambaLMHeadModel" arch name
It's the name of the class of the official implementation, though they don't use it (yet) in the "architectures" field of config.json
This commit is contained in:
parent
a3f4a1c7dc
commit
9f55809f72
3 changed files with 1 additions and 4 deletions
|
@ -1844,7 +1844,7 @@ class StarCoder2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||||
|
|
||||||
|
|
||||||
@Model.register("MambaForCausalLM")
|
@Model.register("MambaForCausalLM", "MambaLMHeadModel")
|
||||||
class MambaModel(Model):
|
class MambaModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.MAMBA
|
model_arch = gguf.MODEL_ARCH.MAMBA
|
||||||
|
|
||||||
|
|
|
@ -211,7 +211,6 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
||||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
||||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||||
# FIXME: NAMES FOR MAMBA ARE NOT FINAL
|
|
||||||
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||||
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||||
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
||||||
|
|
|
@ -401,8 +401,6 @@ enum llm_tensor {
|
||||||
LLM_TENSOR_ATTN_Q_NORM,
|
LLM_TENSOR_ATTN_Q_NORM,
|
||||||
LLM_TENSOR_ATTN_K_NORM,
|
LLM_TENSOR_ATTN_K_NORM,
|
||||||
LLM_TENSOR_LAYER_OUT_NORM,
|
LLM_TENSOR_LAYER_OUT_NORM,
|
||||||
// TODO: maybe use longer names?
|
|
||||||
// TODO: can the in_proj and/or the out_proj instead re-use some of the above types?
|
|
||||||
LLM_TENSOR_SSM_IN,
|
LLM_TENSOR_SSM_IN,
|
||||||
LLM_TENSOR_SSM_CONV1D,
|
LLM_TENSOR_SSM_CONV1D,
|
||||||
LLM_TENSOR_SSM_X,
|
LLM_TENSOR_SSM_X,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue