Add Command R Plus GGUF
This commit is contained in:
parent
0a1d889e27
commit
2efcd87b12
3 changed files with 7 additions and 0 deletions
|
@ -149,6 +149,7 @@ class Model(ABC):
|
||||||
# map tensor names
|
# map tensor names
|
||||||
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||||
if new_name is None:
|
if new_name is None:
|
||||||
|
print(tensor_map)
|
||||||
print(f"Can not map tensor {name!r}")
|
print(f"Can not map tensor {name!r}")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
@ -2344,6 +2345,8 @@ class CommandR2Model(Model):
|
||||||
|
|
||||||
# max_position_embeddings = 8192 in config.json but model was actually
|
# max_position_embeddings = 8192 in config.json but model was actually
|
||||||
# trained on 128k context length
|
# trained on 128k context length
|
||||||
|
if "model_max_length" not in self.hparams:
|
||||||
|
self.hparams["model_max_length"] = 131072
|
||||||
self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]
|
self.hparams["max_position_embeddings"] = self.hparams["model_max_length"]
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
|
|
|
@ -638,6 +638,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_GATE,
|
MODEL_TENSOR.FFN_GATE,
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
MODEL_TENSOR.ATTN_K_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q_NORM,
|
||||||
],
|
],
|
||||||
# TODO
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
|
@ -285,12 +285,14 @@ class TensorNameMap:
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
|
"model.layers.{bid}.self_attn.q_norm", # cohere
|
||||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
|
"model.layers.{bid}.self_attn.k_norm", # cohere
|
||||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||||
),
|
),
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue