Add Command R Plus GGUF
This commit is contained in:
parent
0a1d889e27
commit
2efcd87b12
3 changed files with 7 additions and 0 deletions
|
@ -149,6 +149,7 @@ class Model(ABC):
|
|||
# map tensor names
|
||||
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||
if new_name is None:
|
||||
print(tensor_map)
|
||||
print(f"Can not map tensor {name!r}")
|
||||
sys.exit()
|
||||
|
||||
|
@ -2344,6 +2345,8 @@ class CommandR2Model(Model):
|
|||
|
||||
# max_position_embeddings = 8192 in config.json but model was actually
|
||||
# trained on 128k context length
|
||||
if "model_max_length" not in self.hparams:
|
||||
self.hparams["model_max_length"] = 131072
|
||||
self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
|
|
|
@ -638,6 +638,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.ATTN_K_NORM,
|
||||
MODEL_TENSOR.ATTN_Q_NORM,
|
||||
],
|
||||
# TODO
|
||||
}
|
||||
|
|
|
@ -285,12 +285,14 @@ class TensorNameMap:
|
|||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.q_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_K_NORM: (
|
||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||
"model.layers.{bid}.self_attn.k_norm", # cohere
|
||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||
),
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue