cleanup convert-hf-to-gguf.py
This commit is contained in:
parent
b8ff85efe0
commit
c2f407e398
1 changed files with 47 additions and 51 deletions
|
@ -10,7 +10,7 @@ import re
|
||||||
import sys
|
import sys
|
||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, cast
|
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, Sequence, cast
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
@ -25,15 +25,6 @@ import gguf
|
||||||
from convert import HfVocab
|
from convert import HfVocab
|
||||||
|
|
||||||
|
|
||||||
# check for any of the given keys in the dictionary and return the value of the first key found
|
|
||||||
def get_key_opts(d, keys):
|
|
||||||
for k in keys:
|
|
||||||
if k in d:
|
|
||||||
return d[k]
|
|
||||||
print(f"Could not find any of {keys}")
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
|
|
||||||
###### MODEL DEFINITIONS ######
|
###### MODEL DEFINITIONS ######
|
||||||
|
|
||||||
class SentencePieceTokenTypes(IntEnum):
|
class SentencePieceTokenTypes(IntEnum):
|
||||||
|
@ -58,6 +49,15 @@ class Model:
|
||||||
self.hparams = Model.load_hparams(self.dir_model)
|
self.hparams = Model.load_hparams(self.dir_model)
|
||||||
self.model_arch = self._get_model_architecture()
|
self.model_arch = self._get_model_architecture()
|
||||||
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False)
|
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False)
|
||||||
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
|
||||||
|
|
||||||
|
def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any:
|
||||||
|
key = next((k for k in keys if k in self.hparams), None)
|
||||||
|
if key is not None:
|
||||||
|
return self.hparams[key]
|
||||||
|
if optional:
|
||||||
|
return None
|
||||||
|
raise KeyError(f"could not find any of: {keys}")
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_gpt2()
|
self._set_vocab_gpt2()
|
||||||
|
@ -79,28 +79,33 @@ class Model:
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
self.gguf_writer.add_block_count(self.hparams.get(
|
self.gguf_writer.add_block_count(self.block_count)
|
||||||
"n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")),
|
|
||||||
))
|
|
||||||
if (n_ctx := self.hparams.get("max_position_embeddings")) is not None:
|
if (n_ctx := self.hparams.get("max_position_embeddings")) is not None:
|
||||||
self.gguf_writer.add_context_length(n_ctx)
|
self.gguf_writer.add_context_length(n_ctx)
|
||||||
if (n_embd := self.hparams.get("hidden_size")) is not None:
|
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
if (n_ff := self.hparams.get("intermediate_size")) is not None:
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
|
|
||||||
|
if (n_ff := self.find_hparam(["intermediate_size", "n_inner"])) is not None:
|
||||||
self.gguf_writer.add_feed_forward_length(n_ff)
|
self.gguf_writer.add_feed_forward_length(n_ff)
|
||||||
if (n_head := self.hparams.get("num_attention_heads")) is not None:
|
|
||||||
self.gguf_writer.add_head_count(n_head)
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
self.gguf_writer.add_head_count(n_head)
|
||||||
|
|
||||||
if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
|
if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
|
||||||
self.gguf_writer.add_head_count_kv(n_head_kv)
|
self.gguf_writer.add_head_count_kv(n_head_kv)
|
||||||
|
|
||||||
if (n_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
|
if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(n_rms_eps)
|
self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
|
||||||
|
if (f_norm_eps := self.hparams.get("layer_norm_eps")) is not None:
|
||||||
|
self.gguf_writer.add_layer_norm_eps(f_norm_eps)
|
||||||
if (n_experts := self.hparams.get("num_local_experts")) is not None:
|
if (n_experts := self.hparams.get("num_local_experts")) is not None:
|
||||||
self.gguf_writer.add_expert_count(n_experts)
|
self.gguf_writer.add_expert_count(n_experts)
|
||||||
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
|
if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None:
|
||||||
self.gguf_writer.add_expert_used_count(n_experts_used)
|
self.gguf_writer.add_expert_used_count(n_experts_used)
|
||||||
|
|
||||||
self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True))
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
def write_tensors(self):
|
def write_tensors(self):
|
||||||
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
|
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
|
||||||
|
@ -1301,21 +1306,21 @@ class GPT2Model(Model):
|
||||||
|
|
||||||
class Phi2Model(Model):
|
class Phi2Model(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = get_key_opts(self.hparams, ["num_hidden_layers", "n_layer"])
|
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
|
||||||
|
|
||||||
rot_pct = get_key_opts(self.hparams, ["partial_rotary_factor"])
|
rot_pct = self.find_hparam(["partial_rotary_factor"])
|
||||||
n_embd = get_key_opts(self.hparams, ["hidden_size", "n_embd"])
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
n_head = get_key_opts(self.hparams, ["num_attention_heads", "n_head"])
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
|
||||||
self.gguf_writer.add_name("Phi2")
|
self.gguf_writer.add_name("Phi2")
|
||||||
self.gguf_writer.add_context_length(get_key_opts(self.hparams, ["n_positions", "max_position_embeddings"]))
|
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
|
||||||
|
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
self.gguf_writer.add_feed_forward_length(4 * n_embd)
|
self.gguf_writer.add_feed_forward_length(4 * n_embd)
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
self.gguf_writer.add_head_count(n_head)
|
self.gguf_writer.add_head_count(n_head)
|
||||||
self.gguf_writer.add_head_count_kv(n_head)
|
self.gguf_writer.add_head_count_kv(n_head)
|
||||||
self.gguf_writer.add_layer_norm_eps(get_key_opts(self.hparams, ["layer_norm_epsilon", "layer_norm_eps"]))
|
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_epsilon", "layer_norm_eps"]))
|
||||||
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
|
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
self.gguf_writer.add_add_bos_token(False)
|
self.gguf_writer.add_add_bos_token(False)
|
||||||
|
@ -1640,21 +1645,12 @@ in chat mode so that the conversation can end normally.")
|
||||||
class BertModel(Model):
|
class BertModel(Model):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.block_count = self.hparams["num_hidden_layers"]
|
|
||||||
self.vocab_size = None
|
self.vocab_size = None
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
# TODO(cebtenzzre): merge with parent class
|
super().set_gguf_parameters()
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
|
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
|
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
|
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
|
||||||
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
|
|
||||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
|
|
||||||
self.gguf_writer.add_causal_attention(False)
|
self.gguf_writer.add_causal_attention(False)
|
||||||
self.gguf_writer.add_pooling_layer(True)
|
self.gguf_writer.add_pooling_layer(True)
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
path = self.dir_model
|
path = self.dir_model
|
||||||
|
@ -1678,7 +1674,7 @@ class BertModel(Model):
|
||||||
if tok.startswith(b"##"):
|
if tok.startswith(b"##"):
|
||||||
return tok[2:]
|
return tok[2:]
|
||||||
return b"\xe2\x96\x81" + tok
|
return b"\xe2\x96\x81" + tok
|
||||||
tokens = [phantom(t, y) for t, y in zip(tokens, toktypes)]
|
tokens = tuple(phantom(t, y) for t, y in zip(tokens, toktypes))
|
||||||
|
|
||||||
# set up bos and eos tokens (cls and sep)
|
# set up bos and eos tokens (cls and sep)
|
||||||
self.gguf_writer.add_bos_token_id(vocab.tokenizer.cls_token_id)
|
self.gguf_writer.add_bos_token_id(vocab.tokenizer.cls_token_id)
|
||||||
|
@ -1732,34 +1728,34 @@ class BertModel(Model):
|
||||||
|
|
||||||
class NomicBertModel(BertModel):
|
class NomicBertModel(BertModel):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
Model.__init__(self, *args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.block_count = self.hparams["n_layer"]
|
|
||||||
|
# the HF config claims n_ctx=8192, but it uses RoPE scaling
|
||||||
|
self.hparams["n_ctx"] = 2048
|
||||||
|
|
||||||
|
# SwigLU activation
|
||||||
assert self.hparams["activation_function"] == "swiglu"
|
assert self.hparams["activation_function"] == "swiglu"
|
||||||
assert self.hparams["causal"] is False # True is untested
|
# this doesn't do anything in the HF version
|
||||||
|
assert self.hparams["causal"] is False
|
||||||
|
# no bias tensors
|
||||||
assert self.hparams["qkv_proj_bias"] is False
|
assert self.hparams["qkv_proj_bias"] is False
|
||||||
assert self.hparams["mlp_fc1_bias"] is False
|
assert self.hparams["mlp_fc1_bias"] is False
|
||||||
assert self.hparams["mlp_fc2_bias"] is False
|
assert self.hparams["mlp_fc2_bias"] is False
|
||||||
|
# norm at end of layer
|
||||||
assert self.hparams["prenorm"] is False
|
assert self.hparams["prenorm"] is False
|
||||||
|
# standard RoPE
|
||||||
assert self.hparams["rotary_emb_fraction"] == 1.0
|
assert self.hparams["rotary_emb_fraction"] == 1.0
|
||||||
assert self.hparams["rotary_emb_interleaved"] is False
|
assert self.hparams["rotary_emb_interleaved"] is False
|
||||||
assert self.hparams["rotary_emb_scale_base"] is None
|
assert self.hparams["rotary_emb_scale_base"] is None
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
# TODO(cebtenzzre): merge with parent class
|
super().set_gguf_parameters()
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
|
||||||
# the HF config claims n_ctx=8192, but it uses RoPE scaling
|
|
||||||
self.gguf_writer.add_context_length(2048)
|
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
|
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
|
||||||
self.gguf_writer.add_head_count(self.hparams["n_head"])
|
|
||||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
|
||||||
self.gguf_writer.add_causal_attention(self.hparams["causal"])
|
self.gguf_writer.add_causal_attention(self.hparams["causal"])
|
||||||
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
||||||
self.gguf_writer.add_pooling_layer(True)
|
self.gguf_writer.add_pooling_layer(True)
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
def get_tensors(self):
|
def get_tensors(self):
|
||||||
|
assert self.vocab_size is not None
|
||||||
for name, data in super().get_tensors():
|
for name, data in super().get_tensors():
|
||||||
# Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly.
|
# Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly.
|
||||||
if name == 'embeddings.word_embeddings.weight' and data.shape[1] != self.vocab_size:
|
if name == 'embeddings.word_embeddings.weight' and data.shape[1] != self.vocab_size:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue