Update convert-hf-to-gguf.py, constants, and tensor mappings
This commit is contained in:
parent
4b91f65549
commit
5bf560b282
3 changed files with 74 additions and 0 deletions
|
@ -2336,6 +2336,43 @@ class GemmaModel(Model):
|
||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
@Model.register("Gemma2ForCausalLM")
|
||||||
|
class Gemma2Model(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.GEMMA2
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
self._set_vocab_sentencepiece()
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
hparams = self.hparams
|
||||||
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
||||||
|
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
||||||
|
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||||
|
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
|
||||||
|
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
|
self.gguf_writer.add_key_length(hparams["head_dim"])
|
||||||
|
self.gguf_writer.add_value_length(hparams["head_dim"])
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unusem
|
||||||
|
|
||||||
|
# lm_head is not used in llama.cpp, while autoawq will include this tensor in model
|
||||||
|
# To prevent errors, skip loading lm_head.weight.
|
||||||
|
if name == "lm_head.weight":
|
||||||
|
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
||||||
|
if name.endswith("norm.weight"):
|
||||||
|
data_torch = data_torch + 1
|
||||||
|
|
||||||
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
@Model.register("Starcoder2ForCausalLM")
|
@Model.register("Starcoder2ForCausalLM")
|
||||||
class StarCoder2Model(Model):
|
class StarCoder2Model(Model):
|
||||||
|
|
|
@ -150,6 +150,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
INTERNLM2 = auto()
|
INTERNLM2 = auto()
|
||||||
MINICPM = auto()
|
MINICPM = auto()
|
||||||
GEMMA = auto()
|
GEMMA = auto()
|
||||||
|
GEMMA2 = auto()
|
||||||
STARCODER2 = auto()
|
STARCODER2 = auto()
|
||||||
MAMBA = auto()
|
MAMBA = auto()
|
||||||
XVERSE = auto()
|
XVERSE = auto()
|
||||||
|
@ -180,10 +181,13 @@ class MODEL_TENSOR(IntEnum):
|
||||||
ATTN_NORM = auto()
|
ATTN_NORM = auto()
|
||||||
ATTN_NORM_2 = auto()
|
ATTN_NORM_2 = auto()
|
||||||
ATTN_OUT_NORM = auto()
|
ATTN_OUT_NORM = auto()
|
||||||
|
ATTN_POST_NORM = auto()
|
||||||
ATTN_ROT_EMBD = auto()
|
ATTN_ROT_EMBD = auto()
|
||||||
FFN_GATE_INP = auto()
|
FFN_GATE_INP = auto()
|
||||||
FFN_GATE_INP_SHEXP = auto()
|
FFN_GATE_INP_SHEXP = auto()
|
||||||
FFN_NORM = auto()
|
FFN_NORM = auto()
|
||||||
|
FFN_PRE_NORM = auto()
|
||||||
|
FFN_POST_NORM = auto()
|
||||||
FFN_GATE = auto()
|
FFN_GATE = auto()
|
||||||
FFN_DOWN = auto()
|
FFN_DOWN = auto()
|
||||||
FFN_UP = auto()
|
FFN_UP = auto()
|
||||||
|
@ -270,6 +274,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.INTERNLM2: "internlm2",
|
MODEL_ARCH.INTERNLM2: "internlm2",
|
||||||
MODEL_ARCH.MINICPM: "minicpm",
|
MODEL_ARCH.MINICPM: "minicpm",
|
||||||
MODEL_ARCH.GEMMA: "gemma",
|
MODEL_ARCH.GEMMA: "gemma",
|
||||||
|
MODEL_ARCH.GEMMA2: "gemma2",
|
||||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||||
MODEL_ARCH.MAMBA: "mamba",
|
MODEL_ARCH.MAMBA: "mamba",
|
||||||
MODEL_ARCH.XVERSE: "xverse",
|
MODEL_ARCH.XVERSE: "xverse",
|
||||||
|
@ -303,9 +308,12 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
|
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
|
||||||
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
|
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
|
||||||
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
|
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
|
||||||
|
MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
|
||||||
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
|
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
|
||||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
|
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
||||||
|
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
|
||||||
|
MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
|
||||||
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
|
@ -751,6 +759,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
MODEL_TENSOR.FFN_NORM,
|
MODEL_TENSOR.FFN_NORM,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.GEMMA2: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_POST_NORM,
|
||||||
|
MODEL_TENSOR.FFN_PRE_NORM,
|
||||||
|
MODEL_TENSOR.FFN_POST_NORM,
|
||||||
|
],
|
||||||
MODEL_ARCH.STARCODER2: [
|
MODEL_ARCH.STARCODER2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
|
|
@ -187,6 +187,10 @@ class TensorNameMap:
|
||||||
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
||||||
),
|
),
|
||||||
|
|
||||||
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||||
|
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
||||||
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
||||||
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
||||||
|
@ -210,6 +214,16 @@ class TensorNameMap:
|
||||||
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||||
),
|
),
|
||||||
|
|
||||||
|
# Post feed-forward norm
|
||||||
|
MODEL_TENSOR.FFN_PRE_NORM: (
|
||||||
|
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
||||||
|
),
|
||||||
|
|
||||||
|
# Post feed-forward norm
|
||||||
|
MODEL_TENSOR.FFN_POST_NORM: (
|
||||||
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
||||||
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
"layers.{bid}.feed_forward.gate", # mixtral
|
"layers.{bid}.feed_forward.gate", # mixtral
|
||||||
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
"model.layers.{bid}.block_sparse_moe.gate", # mixtral
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue