From c1c0f4d883d9b234c49e26e30d02d928f1bd103f Mon Sep 17 00:00:00 2001 From: Joan Martinez Date: Mon, 22 Apr 2024 13:45:32 +0200 Subject: [PATCH] fix: fix convert formatting --- convert-hf-to-gguf.py | 94 +++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 52 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index f4a758aaa..c1b6888bc 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -77,13 +77,11 @@ class Model(ABC): for part_name in self.part_names: print(f"gguf: loading model part '{part_name}'") ctx: ContextManager[Any] - if self.is_safetensors: from safetensors import safe_open ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu")) else: - ctx = contextlib.nullcontext( - torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) + ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) with ctx as model_part: for name in model_part.keys(): @@ -120,8 +118,7 @@ class Model(ABC): if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None: self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) print(f"gguf: rms norm epsilon = {f_rms_eps}") - if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], - optional=True)) is not None: + if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None: self.gguf_writer.add_layer_norm_eps(f_norm_eps) print(f"gguf: layer norm epsilon = {f_norm_eps}") if (n_experts := self.hparams.get("num_local_experts")) is not None: @@ -209,7 +206,6 @@ class Model(ABC): for name in names: cls._model_classes[name] = modelcls return modelcls - return func @classmethod @@ -294,7 +290,7 @@ class Model(ABC): # for this kind of tokenizer, added_vocab is not a subset of vocab, so they need to be combined added_vocab = tokenizer.special_tokens - reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()} + reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in (vocab | added_vocab).items()} for i in range(vocab_size): if i not in reverse_vocab: @@ -779,8 +775,8 @@ class BaichuanModel(Model): return ( weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) - .swapaxes(1, 2) - .reshape(weights.shape) + .swapaxes(1, 2) + .reshape(weights.shape) ) def _reverse_hf_permute_part( @@ -931,8 +927,8 @@ class XverseModel(Model): return ( weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) - .swapaxes(1, 2) - .reshape(weights.shape) + .swapaxes(1, 2) + .reshape(weights.shape) ) @@ -1209,8 +1205,7 @@ class StableLMModel(Model): self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"]) rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"]) - self.gguf_writer.add_rope_dimension_count( - int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"]))) + self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"]))) self.gguf_writer.add_head_count(hparams["num_attention_heads"]) self.gguf_writer.add_head_count_kv(hparams["num_key_value_heads"]) self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True) @@ -1304,7 +1299,7 @@ class LlamaModel(Model): def set_vocab(self): try: - self._set_vocab_sentencepiece() + self. _set_vocab_sentencepiece() except FileNotFoundError: try: self._set_vocab_llama_hf() @@ -1653,8 +1648,8 @@ class MiniCPMModel(Model): return ( weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) - .swapaxes(1, 2) - .reshape(weights.shape) + .swapaxes(1, 2) + .reshape(weights.shape) ) def write_tensors(self): @@ -1914,8 +1909,7 @@ class GPT2Model(Model): for name, data_torch in self.get_tensors(): # we don't need these - if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq", - ".attn.bias", ".attn.masked_bias")): + if name.endswith((".attention.masked_bias", ".attention.bias", ".attention.rotary_emb.inv_freq", ".attn.bias", ".attn.masked_bias")): continue if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_proj.weight")): @@ -2300,8 +2294,7 @@ in chat mode so that the conversation can end normally.") bid = re.findall(qkv_pattern, name)[0] qkv = data_torch qkv = rearrange(qkv.T, " o (g n i) ->o g n i", g=num_groups, n=q_per_kv + 2, i=head_dim) - q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., - q_per_kv + 1: q_per_kv + 2, :] + q, k, v = qkv[..., : q_per_kv, :], qkv[..., q_per_kv: q_per_kv + 1, :], qkv[..., q_per_kv + 1: q_per_kv + 2, :] # The model weights of q and k equire additional reshape. q = self._hf_permute_qk(rearrange(q, " o g n i -> o (g n i)").T, num_heads, num_heads) k = self._hf_permute_qk(rearrange(k, " o g n i -> o (g n i)").T, num_heads, num_kv_heads) @@ -2384,7 +2377,6 @@ class BertModel(Model): # map tensor names new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) - if new_name is None: print(f"Can not map tensor {name!r}") sys.exit() @@ -2441,31 +2433,6 @@ class NomicBertModel(BertModel): self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"]) -@Model.register("JinaBertModel") -class JinaBertModel(BertModel): - model_arch = gguf.MODEL_ARCH.JINA_BERT - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.intermediate_size = self.hparams["intermediate_size"] - - def get_tensors(self): - for name, data in super().get_tensors(): - if 'gated_layers' in name: - d1 = data[:self.intermediate_size, :] - name1 = name.replace('gated_layers', 'gated_layers_w') - d2 = data[self.intermediate_size:, :] - name2 = name.replace('gated_layers', 'gated_layers_v') - yield name1, d1 - yield name2, d2 - continue - - yield name, data - - -JinaBertForMaskedML = JinaBertModel - - @Model.register("GemmaForCausalLM") class GemmaModel(Model): model_arch = gguf.MODEL_ARCH.GEMMA @@ -2493,8 +2460,7 @@ class GemmaModel(Model): self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"]) self.gguf_writer.add_head_count(hparams["num_attention_heads"]) - self.gguf_writer.add_head_count_kv( - self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"]) + self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_key_length(hparams["head_dim"]) self.gguf_writer.add_value_length(hparams["head_dim"]) @@ -2604,10 +2570,10 @@ class MambaModel(Model): assert d_inner == 2 * d_model self.gguf_writer.add_name(self.dir_model.name) - self.gguf_writer.add_context_length(2 ** 20) # arbitrary value; for those who use the default + self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default self.gguf_writer.add_embedding_length(d_model) - self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading - self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading + self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading + self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_ssm_conv_kernel(d_conv) self.gguf_writer.add_ssm_inner_size(d_inner) @@ -2622,7 +2588,7 @@ class MambaModel(Model): tok_embd = None tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight" - output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight" + output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight" for name, data_torch in self.get_tensors(): old_dtype = data_torch.dtype @@ -2748,6 +2714,29 @@ class OlmoModel(Model): self.gguf_writer.add_tensor(new_name, data) +@Model.register("JinaBertModel") +class JinaBertModel(BertModel): + model_arch = gguf.MODEL_ARCH.JINA_BERT + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.intermediate_size = self.hparams["intermediate_size"] + + def get_tensors(self): + for name, data in super().get_tensors(): + if 'gated_layers' in name: + d1 = data[:self.intermediate_size, :] + name1 = name.replace('gated_layers', 'gated_layers_w') + d2 = data[self.intermediate_size:, :] + name2 = name.replace('gated_layers', 'gated_layers_v') + yield name1, d1 + yield name2, d2 + continue + + yield name, data + + +JinaBertForMaskedML = JinaBertModel ###### CONVERSION LOGIC ###### @@ -2816,6 +2805,7 @@ def main() -> None: print(f"Loading model: {dir_model.name}") hparams = Model.load_hparams(dir_model) + with torch.inference_mode(): model_class = Model.from_model_architecture(hparams["architectures"][0]) model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file)