From 443f7d586ee84cfb614b7f282836b86c5180dfa8 Mon Sep 17 00:00:00 2001 From: Galunid Date: Sun, 29 Oct 2023 20:00:54 +0100 Subject: [PATCH] Call add_tensor before write_* functions --- convert-generic.py | 15 ++++++++++----- model.py | 13 ++++++++++++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/convert-generic.py b/convert-generic.py index 344a31dcf..bd366d250 100755 --- a/convert-generic.py +++ b/convert-generic.py @@ -54,14 +54,19 @@ print("gguf: get gpt2 tokenizer vocab") model_instance.set_vocab() # write model -print("gguf: write header") -model_instance.gguf_writer.write_header_to_file() -print("gguf: write metadata") -model_instance.gguf_writer.write_kv_data_to_file() if not args.vocab_only: - print("gguf: write tensors") model_instance.write_tensors() + print("gguf: write header") + model_instance.gguf_writer.write_header_to_file() + print("gguf: write metadata") + model_instance.gguf_writer.write_kv_data_to_file() + print("gguf: write tensors") model_instance.gguf_writer.write_tensors_to_file() +else: + print("gguf: write header") + model_instance.gguf_writer.write_header_to_file() + print("gguf: write metadata") + model_instance.gguf_writer.write_kv_data_to_file() model_instance.gguf_writer.close() diff --git a/model.py b/model.py index c35fe3bbf..a3c53fcc0 100644 --- a/model.py +++ b/model.py @@ -190,7 +190,18 @@ class StableLMModel(Model): class GPTNeoXModel(Model): - pass + def set_gguf_parameters(self): + block_count = self.hparams["num_hidden_layers"] + + self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) + self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_rope_dimension_count(int(self.hparams["rotary_pct"]*(self.hparams["hidden_size"]//self.hparams["num_attention_heads"]))) + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) + self.gguf_writer.add_parallel_residual(self.hparams["use_parallel_residual"] if "use_parallel_residual" in self.hparams else True) + self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) class BloomModel(Model): def set_gguf_parameters(self):