Add Command R Plus GGUF

2024-04-04 18:23:23 +01:00 · 2024-04-04 18:23:23 +01:00 · 2efcd87b12
commit 2efcd87b12
parent 0a1d889e27
3 changed files with 7 additions and 0 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -149,6 +149,7 @@ class Model(ABC):
            # map tensor names
            new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
            if new_name is None:
+                print(tensor_map)
                print(f"Can not map tensor {name!r}")
                sys.exit()

@ -2344,6 +2345,8 @@ class CommandR2Model(Model):

        # max_position_embeddings = 8192 in config.json but model was actually
        # trained on 128k context length
+        if "model_max_length" not in self.hparams:
+            self.hparams["model_max_length"] =  131072
        self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]

    def set_gguf_parameters(self):
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -638,6 +638,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.FFN_GATE,
        MODEL_TENSOR.FFN_DOWN,
        MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.ATTN_K_NORM,
+	    MODEL_TENSOR.ATTN_Q_NORM,
    ],
    # TODO
 }
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@ -285,12 +285,14 @@ class TensorNameMap:
        MODEL_TENSOR.ATTN_Q_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
            "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
+            "model.layers.{bid}.self_attn.q_norm",                            # cohere
            "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
        ),

        MODEL_TENSOR.ATTN_K_NORM: (
            "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
            "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
+            "model.layers.{bid}.self_attn.k_norm",                            # cohere
            "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
        ),