This commit is contained in:
wheelspawn 2024-06-13 14:37:23 -05:00
parent 172c825684
commit 42c90d21ca

View file

@ -1,5 +1,4 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations from __future__ import annotations
@ -47,12 +46,11 @@ class Model:
_model_classes: dict[str, type[Model]] = {} _model_classes: dict[str, type[Model]] = {}
dir_model: Path dir_model: Path
ftype: gguf.LlamaFileType ftype: int
is_big_endian: bool is_big_endian: bool
endianess: gguf.GGUFEndian endianess: gguf.GGUFEndian
use_temp_file: bool use_temp_file: bool
lazy: bool lazy: bool
model_name: str | None
part_names: list[str] part_names: list[str]
is_safetensors: bool is_safetensors: bool
hparams: dict[str, Any] hparams: dict[str, Any]
@ -65,7 +63,7 @@ class Model:
# subclasses should define this! # subclasses should define this!
model_arch: gguf.MODEL_ARCH model_arch: gguf.MODEL_ARCH
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None): def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool):
if type(self) is Model: if type(self) is Model:
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
self.dir_model = dir_model self.dir_model = dir_model
@ -74,11 +72,10 @@ class Model:
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.use_temp_file = use_temp_file self.use_temp_file = use_temp_file
self.lazy = not eager self.lazy = not eager
self.model_name = model_name self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors")
self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
self.is_safetensors = len(self.part_names) > 0 self.is_safetensors = len(self.part_names) > 0
if not self.is_safetensors: if not self.is_safetensors:
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.part_names = Model.get_model_part_names(self.dir_model, ".bin")
self.hparams = Model.load_hparams(self.dir_model) self.hparams = Model.load_hparams(self.dir_model)
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
@ -96,7 +93,7 @@ class Model:
ftype_lw: str = ftype_up.lower() ftype_lw: str = ftype_up.lower()
# allow templating the file name with the output ftype, useful with the "auto" ftype # allow templating the file name with the output ftype, useful with the "auto" ftype
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up) self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
@classmethod @classmethod
def __init_subclass__(cls): def __init_subclass__(cls):
@ -140,7 +137,7 @@ class Model:
from safetensors import safe_open from safetensors import safe_open
ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu")) ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu"))
else: else:
ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=False, weights_only=True))
with ctx as model_part: with ctx as model_part:
tensor_names_from_parts.update(model_part.keys()) tensor_names_from_parts.update(model_part.keys())
@ -177,14 +174,14 @@ class Model:
return False return False
return name == (key_name + suffix) return name == (key_name + suffix)
def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str: def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str:
new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes) new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes)
if new_name is None: if new_name is None:
raise ValueError(f"Can not map tensor {name!r}") raise ValueError(f"Can not map tensor {name!r}")
return new_name return new_name
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.block_count) self.gguf_writer.add_block_count(self.block_count)
if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
@ -249,6 +246,9 @@ class Model:
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
continue continue
if name.startswith("bert."):
name = name.removeprefix("bert.")
old_dtype = data_torch.dtype old_dtype = data_torch.dtype
# convert any unsupported data types to float32 # convert any unsupported data types to float32
@ -326,21 +326,21 @@ class Model:
def write(self): def write(self):
self.write_tensors() self.write_tensors()
self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_header_to_file()
self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_kv_data_to_file()
self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.write_tensors_to_file(progress=True)
self.gguf_writer.close() self.gguf_writer.close()
def write_vocab(self): def write_vocab(self):
self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_header_to_file()
self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_kv_data_to_file()
self.gguf_writer.close() self.gguf_writer.close()
@staticmethod @staticmethod
def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: def get_model_part_names(dir_model: Path, suffix: str) -> list[str]:
part_names: list[str] = [] part_names: list[str] = []
for filename in os.listdir(dir_model): for filename in os.listdir(dir_model):
if filename.startswith(prefix) and filename.endswith(suffix): if filename.endswith(suffix):
part_names.append(filename) part_names.append(filename)
part_names.sort() part_names.sort()
@ -423,9 +423,6 @@ class Model:
# NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script
# or pull the latest version of the model from Huggingface # or pull the latest version of the model from Huggingface
# don't edit the hashes manually! # don't edit the hashes manually!
if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
# ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
res = "llama-bpe"
if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754": if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
# ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
res = "deepseek-llm" res = "deepseek-llm"
@ -435,6 +432,9 @@ class Model:
if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed": if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
# ref: https://huggingface.co/tiiuae/falcon-7b # ref: https://huggingface.co/tiiuae/falcon-7b
res = "falcon" res = "falcon"
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
# ref: https://huggingface.co/google-bert/bert-base-uncased
res = "bert"
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5 # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
res = "bert-bge" res = "bert-bge"
@ -453,18 +453,12 @@ class Model:
if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff": if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
# ref: https://huggingface.co/smallcloudai/Refact-1_6-base # ref: https://huggingface.co/smallcloudai/Refact-1_6-base
res = "refact" res = "refact"
if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8":
# ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01
res = "command-r"
if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea": if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea":
# ref: https://huggingface.co/Qwen/Qwen1.5-7B # ref: https://huggingface.co/Qwen/Qwen1.5-7B
res = "qwen2" res = "qwen2"
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
res = "olmo" res = "olmo"
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
# ref: https://huggingface.co/databricks/dbrx-base
res = "dbrx"
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
res = "jina-v2-en" res = "jina-v2-en"
@ -477,9 +471,6 @@ class Model:
if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d": if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
# ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
res = "smaug-bpe" res = "smaug-bpe"
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
res = "jina-v2-code"
if res is None: if res is None:
logger.warning("\n") logger.warning("\n")
@ -667,7 +658,7 @@ class GPTNeoXModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["num_hidden_layers"] block_count = self.hparams["num_hidden_layers"]
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_block_count(block_count)
@ -800,7 +791,7 @@ class MPTModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layers"] block_count = self.hparams["n_layers"]
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(self.hparams["max_seq_len"]) self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
self.gguf_writer.add_embedding_length(self.hparams["d_model"]) self.gguf_writer.add_embedding_length(self.hparams["d_model"])
self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_block_count(block_count)
@ -852,7 +843,7 @@ class OrionModel(Model):
raise ValueError("gguf: can not find ctx length parameter.") raise ValueError("gguf: can not find ctx length parameter.")
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_source_hf_repo(hf_repo)
self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_context_length(ctx_length)
@ -889,7 +880,7 @@ class BaichuanModel(Model):
else: else:
raise ValueError("gguf: can not find ctx length parameter.") raise ValueError("gguf: can not find ctx length parameter.")
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_source_hf_repo(hf_repo)
self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_context_length(ctx_length)
@ -1012,7 +1003,7 @@ class XverseModel(Model):
else: else:
raise ValueError("gguf: can not find ctx length parameter.") raise ValueError("gguf: can not find ctx length parameter.")
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_source_hf_repo(hf_repo)
self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
self.gguf_writer.add_context_length(ctx_length) self.gguf_writer.add_context_length(ctx_length)
@ -1208,7 +1199,7 @@ class StableLMModel(Model):
hparams = self.hparams hparams = self.hparams
block_count = hparams["num_hidden_layers"] block_count = hparams["num_hidden_layers"]
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_block_count(block_count)
@ -1683,7 +1674,7 @@ class GPT2Model(Model):
model_arch = gguf.MODEL_ARCH.GPT2 model_arch = gguf.MODEL_ARCH.GPT2
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_block_count(self.hparams["n_layer"])
self.gguf_writer.add_context_length(self.hparams["n_ctx"]) self.gguf_writer.add_context_length(self.hparams["n_ctx"])
self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
@ -2193,7 +2184,7 @@ class BertModel(Model):
del bid # unused del bid # unused
# we are only using BERT for embeddings so we don't need the pooling layer # we are only using BERT for embeddings so we don't need the pooling layer
if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name:
return [] # we don't need these return [] # we don't need these
return [(self.map_tensor_name(name), data_torch)] return [(self.map_tensor_name(name), data_torch)]
@ -2250,7 +2241,7 @@ class GemmaModel(Model):
hparams = self.hparams hparams = self.hparams
block_count = hparams["num_hidden_layers"] block_count = hparams["num_hidden_layers"]
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_block_count(block_count)
@ -2350,7 +2341,7 @@ class MambaModel(Model):
# Fail early for models which don't have a block expansion factor of 2 # Fail early for models which don't have a block expansion factor of 2
assert d_inner == 2 * d_model assert d_inner == 2 * d_model
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
self.gguf_writer.add_embedding_length(d_model) self.gguf_writer.add_embedding_length(d_model)
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
@ -2457,13 +2448,11 @@ class JinaBertV2Model(BertModel):
def get_tensors(self): def get_tensors(self):
for name, data in super().get_tensors(): for name, data in super().get_tensors():
if 'gated_layer' in name: if 'gated_layers' in name:
d1 = data[:self.intermediate_size, :] d1 = data[:self.intermediate_size, :]
name1 = name.replace('gated_layers', 'gated_layers_w') name1 = name.replace('gated_layers', 'gated_layers_w')
name1 = name1.replace('up_gated_layer', 'gated_layers_v')
d2 = data[self.intermediate_size:, :] d2 = data[self.intermediate_size:, :]
name2 = name.replace('gated_layers', 'gated_layers_v') name2 = name.replace('gated_layers', 'gated_layers_v')
name2 = name2.replace('up_gated_layer', 'gated_layers_w')
yield name1, d1 yield name1, d1
yield name2, d2 yield name2, d2
continue continue
@ -2848,13 +2837,8 @@ def main() -> None:
hparams = Model.load_hparams(dir_model) hparams = Model.load_hparams(dir_model)
with torch.inference_mode(): with torch.inference_mode():
try:
model_class = Model.from_model_architecture(hparams["architectures"][0]) model_class = Model.from_model_architecture(hparams["architectures"][0])
except NotImplementedError: model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
logger.error(f"Model {hparams['architectures'][0]} is not supported")
sys.exit(1)
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
logger.info("Set model parameters") logger.info("Set model parameters")
model_instance.set_gguf_parameters() model_instance.set_gguf_parameters()
@ -2876,3 +2860,4 @@ def main() -> None:
if __name__ == '__main__': if __name__ == '__main__':
main() main()