From c2f407e3980d5e3cb33290e8970f64919e3a851a Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Mon, 12 Feb 2024 17:35:56 -0500 Subject: [PATCH] cleanup convert-hf-to-gguf.py --- convert-hf-to-gguf.py | 98 +++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 51 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index e50d2e80a..a50a6a972 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -10,7 +10,7 @@ import re import sys from enum import IntEnum from pathlib import Path -from typing import TYPE_CHECKING, Any, ContextManager, Iterator, cast +from typing import TYPE_CHECKING, Any, ContextManager, Iterator, Sequence, cast import numpy as np import torch @@ -25,15 +25,6 @@ import gguf from convert import HfVocab -# check for any of the given keys in the dictionary and return the value of the first key found -def get_key_opts(d, keys): - for k in keys: - if k in d: - return d[k] - print(f"Could not find any of {keys}") - sys.exit() - - ###### MODEL DEFINITIONS ###### class SentencePieceTokenTypes(IntEnum): @@ -58,6 +49,15 @@ class Model: self.hparams = Model.load_hparams(self.dir_model) self.model_arch = self._get_model_architecture() self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False) + self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) + + def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any: + key = next((k for k in keys if k in self.hparams), None) + if key is not None: + return self.hparams[key] + if optional: + return None + raise KeyError(f"could not find any of: {keys}") def set_vocab(self): self._set_vocab_gpt2() @@ -79,28 +79,33 @@ class Model: def set_gguf_parameters(self): self.gguf_writer.add_name(self.dir_model.name) - self.gguf_writer.add_block_count(self.hparams.get( - "n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")), - )) + self.gguf_writer.add_block_count(self.block_count) + if (n_ctx := self.hparams.get("max_position_embeddings")) is not None: self.gguf_writer.add_context_length(n_ctx) - if (n_embd := self.hparams.get("hidden_size")) is not None: - self.gguf_writer.add_embedding_length(n_embd) - if (n_ff := self.hparams.get("intermediate_size")) is not None: + + n_embd = self.find_hparam(["hidden_size", "n_embd"]) + self.gguf_writer.add_embedding_length(n_embd) + + if (n_ff := self.find_hparam(["intermediate_size", "n_inner"])) is not None: self.gguf_writer.add_feed_forward_length(n_ff) - if (n_head := self.hparams.get("num_attention_heads")) is not None: - self.gguf_writer.add_head_count(n_head) + + n_head = self.find_hparam(["num_attention_heads", "n_head"]) + self.gguf_writer.add_head_count(n_head) + if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None: self.gguf_writer.add_head_count_kv(n_head_kv) - if (n_rms_eps := self.hparams.get("rms_norm_eps")) is not None: - self.gguf_writer.add_layer_norm_rms_eps(n_rms_eps) + if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None: + self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) + if (f_norm_eps := self.hparams.get("layer_norm_eps")) is not None: + self.gguf_writer.add_layer_norm_eps(f_norm_eps) if (n_experts := self.hparams.get("num_local_experts")) is not None: self.gguf_writer.add_expert_count(n_experts) if (n_experts_used := self.hparams.get("num_experts_per_tok")) is not None: self.gguf_writer.add_expert_used_count(n_experts_used) - self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True)) + self.gguf_writer.add_file_type(self.ftype) def write_tensors(self): block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) @@ -1301,21 +1306,21 @@ class GPT2Model(Model): class Phi2Model(Model): def set_gguf_parameters(self): - block_count = get_key_opts(self.hparams, ["num_hidden_layers", "n_layer"]) + block_count = self.find_hparam(["num_hidden_layers", "n_layer"]) - rot_pct = get_key_opts(self.hparams, ["partial_rotary_factor"]) - n_embd = get_key_opts(self.hparams, ["hidden_size", "n_embd"]) - n_head = get_key_opts(self.hparams, ["num_attention_heads", "n_head"]) + rot_pct = self.find_hparam(["partial_rotary_factor"]) + n_embd = self.find_hparam(["hidden_size", "n_embd"]) + n_head = self.find_hparam(["num_attention_heads", "n_head"]) self.gguf_writer.add_name("Phi2") - self.gguf_writer.add_context_length(get_key_opts(self.hparams, ["n_positions", "max_position_embeddings"])) + self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"])) self.gguf_writer.add_embedding_length(n_embd) self.gguf_writer.add_feed_forward_length(4 * n_embd) self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_head_count(n_head) self.gguf_writer.add_head_count_kv(n_head) - self.gguf_writer.add_layer_norm_eps(get_key_opts(self.hparams, ["layer_norm_epsilon", "layer_norm_eps"])) + self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_epsilon", "layer_norm_eps"])) self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head) self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_add_bos_token(False) @@ -1640,21 +1645,12 @@ in chat mode so that the conversation can end normally.") class BertModel(Model): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.block_count = self.hparams["num_hidden_layers"] self.vocab_size = None def set_gguf_parameters(self): - # TODO(cebtenzzre): merge with parent class - self.gguf_writer.add_name(self.dir_model.name) - self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) - self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) - self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) - self.gguf_writer.add_block_count(self.block_count) - self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) - self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) + super().set_gguf_parameters() self.gguf_writer.add_causal_attention(False) self.gguf_writer.add_pooling_layer(True) - self.gguf_writer.add_file_type(self.ftype) def set_vocab(self): path = self.dir_model @@ -1678,7 +1674,7 @@ class BertModel(Model): if tok.startswith(b"##"): return tok[2:] return b"\xe2\x96\x81" + tok - tokens = [phantom(t, y) for t, y in zip(tokens, toktypes)] + tokens = tuple(phantom(t, y) for t, y in zip(tokens, toktypes)) # set up bos and eos tokens (cls and sep) self.gguf_writer.add_bos_token_id(vocab.tokenizer.cls_token_id) @@ -1732,34 +1728,34 @@ class BertModel(Model): class NomicBertModel(BertModel): def __init__(self, *args, **kwargs): - Model.__init__(self, *args, **kwargs) - self.block_count = self.hparams["n_layer"] + super().__init__(*args, **kwargs) + + # the HF config claims n_ctx=8192, but it uses RoPE scaling + self.hparams["n_ctx"] = 2048 + + # SwigLU activation assert self.hparams["activation_function"] == "swiglu" - assert self.hparams["causal"] is False # True is untested + # this doesn't do anything in the HF version + assert self.hparams["causal"] is False + # no bias tensors assert self.hparams["qkv_proj_bias"] is False assert self.hparams["mlp_fc1_bias"] is False assert self.hparams["mlp_fc2_bias"] is False + # norm at end of layer assert self.hparams["prenorm"] is False + # standard RoPE assert self.hparams["rotary_emb_fraction"] == 1.0 assert self.hparams["rotary_emb_interleaved"] is False assert self.hparams["rotary_emb_scale_base"] is None def set_gguf_parameters(self): - # TODO(cebtenzzre): merge with parent class - self.gguf_writer.add_name(self.dir_model.name) - # the HF config claims n_ctx=8192, but it uses RoPE scaling - self.gguf_writer.add_context_length(2048) - self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) - self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"]) - self.gguf_writer.add_block_count(self.block_count) - self.gguf_writer.add_head_count(self.hparams["n_head"]) - self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"]) + super().set_gguf_parameters() self.gguf_writer.add_causal_attention(self.hparams["causal"]) self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"]) self.gguf_writer.add_pooling_layer(True) - self.gguf_writer.add_file_type(self.ftype) def get_tensors(self): + assert self.vocab_size is not None for name, data in super().get_tensors(): # Nomic Embed's token embeddings tensor is padded, but llama.cpp wants tensor sizes to match exactly. if name == 'embeddings.word_embeddings.weight' and data.shape[1] != self.vocab_size: