diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 7e601170e..6b7ea1dfd 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -149,6 +149,7 @@ class Model(ABC): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: + print(tensor_map) print(f"Can not map tensor {name!r}") sys.exit() @@ -2344,6 +2345,8 @@ class CommandR2Model(Model): # max_position_embeddings = 8192 in config.json but model was actually # trained on 128k context length + if "model_max_length" not in self.hparams: + self.hparams["model_max_length"] = 131072 self.hparams["max_position_embeddings"] = self.hparams["model_max_length"] def set_gguf_parameters(self): diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 5214764a9..9ed7bbe18 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -638,6 +638,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_GATE, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_Q_NORM, ], # TODO } diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 345b1b0c7..4f02d298e 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -285,12 +285,14 @@ class TensorNameMap: MODEL_TENSOR.ATTN_Q_NORM: ( "language_model.encoder.layers.{bid}.self_attention.q_layernorm", "model.layers.{bid}.self_attn.q_layernorm", # persimmon + "model.layers.{bid}.self_attn.q_norm", # cohere "transformer.blocks.{bid}.attn.q_ln", # sea-lion ), MODEL_TENSOR.ATTN_K_NORM: ( "language_model.encoder.layers.{bid}.self_attention.k_layernorm", "model.layers.{bid}.self_attn.k_layernorm", # persimmon + "model.layers.{bid}.self_attn.k_norm", # cohere "transformer.blocks.{bid}.attn.k_ln", # sea-lion ),