From 9f55809f7211bc58510ba501cfd681e9607cfb6a Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Sun, 4 Feb 2024 09:00:42 -0500
Subject: [PATCH] convert : for Mamba, also consider the "MambaLMHeadModel"
 arch name

It's the name of the class of the official implementation,
though they don't use it (yet) in the "architectures" field of config.json
---
 convert-hf-to-gguf.py     | 2 +-
 gguf-py/gguf/constants.py | 1 -
 llama.cpp                 | 2 --
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index e49b2f4f6..42b0fb66e 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -1844,7 +1844,7 @@ class StarCoder2Model(Model):
     model_arch = gguf.MODEL_ARCH.STARCODER2
 
 
-@Model.register("MambaForCausalLM")
+@Model.register("MambaForCausalLM", "MambaLMHeadModel")
 class MambaModel(Model):
     model_arch = gguf.MODEL_ARCH.MAMBA
 
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index a28108383..651323a1e 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -211,7 +211,6 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
     MODEL_TENSOR.FFN_DOWN_EXP:    "blk.{bid}.ffn_down.{xid}",
     MODEL_TENSOR.FFN_UP_EXP:      "blk.{bid}.ffn_up.{xid}",
     MODEL_TENSOR.LAYER_OUT_NORM:  "blk.{bid}.layer_output_norm",
-    # FIXME: NAMES FOR MAMBA ARE NOT FINAL
     MODEL_TENSOR.SSM_IN:          "blk.{bid}.ssm_in",
     MODEL_TENSOR.SSM_CONV1D:      "blk.{bid}.ssm_conv1d",
     MODEL_TENSOR.SSM_X:           "blk.{bid}.ssm_x",
diff --git a/llama.cpp b/llama.cpp
index 466f8bc0c..37ac7425d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -401,8 +401,6 @@ enum llm_tensor {
     LLM_TENSOR_ATTN_Q_NORM,
     LLM_TENSOR_ATTN_K_NORM,
     LLM_TENSOR_LAYER_OUT_NORM,
-    // TODO: maybe use longer names?
-    // TODO: can the in_proj and/or the out_proj instead re-use some of the above types?
     LLM_TENSOR_SSM_IN,
     LLM_TENSOR_SSM_CONV1D,
     LLM_TENSOR_SSM_X,