diff --git a/convert-persimmon-st-to-gguf.py b/convert-persimmon-st-to-gguf.py deleted file mode 100644 index f8fcbb4bd..000000000 --- a/convert-persimmon-st-to-gguf.py +++ /dev/null @@ -1,135 +0,0 @@ -from convert import lazy_load_safetensors_file -import sys -import torch -from safetensors import safe_open -from pathlib import Path -from pprint import pprint -from sentencepiece import SentencePieceProcessor -import argparse -import gguf -import json -import struct - -def file_is_safetensors(path: Path) -> bool: - fp = open(path, 'rb') - first8 = fp.read(8) - fp.seek(0) - if first8[:2] == b'PK': - # A zip file, i.e. PyTorch format - return False - return struct.unpack(' None: - args = get_args() - assert file_is_safetensors(args.model), 'Error: model file is not a SafeTensors file' - dir_model = args.model.parent - with open(dir_model / 'config.json', 'r') as f: - hparams = json.load(f) - arch = gguf.MODEL_ARCH.PERSIMMON - gguf_writer = gguf.GGUFWriter(args.outfile, gguf.MODEL_ARCH_NAMES[arch]) - - block_count = hparams['num_layers'] - head_count = hparams['num_attention_heads'] - head_count_kv = head_count - ctx_length = hparams['seq_length'] - hidden_size = hparams['hidden_size'] - - gguf_writer.add_name('persimmon-8b-chat') - gguf_writer.add_context_length(ctx_length) - gguf_writer.add_embedding_length(hidden_size) - gguf_writer.add_block_count(block_count) - gguf_writer.add_feed_forward_length(hparams['ffn_hidden_size']) - gguf_writer.add_rope_dimension_count(hidden_size // head_count) - gguf_writer.add_head_count(head_count) - gguf_writer.add_head_count_kv(head_count_kv) - gguf_writer.add_rope_freq_base(hparams['rotary_emb_base']) - gguf_writer.add_layer_norm_eps(hparams['layernorm_epsilon']) - tokens, scores, toktypes = get_tokenizer_info(dir_model) - gguf_writer.add_tokenizer_model('llama') - gguf_writer.add_token_list(tokens) - gguf_writer.add_token_scores(scores) - gguf_writer.add_token_types(toktypes) - gguf_writer.add_bos_token_id(71013) - gguf_writer.add_eos_token_id(71013) - - tensor_map = gguf.get_tensor_name_map(arch, block_count) - print(tensor_map) - tensors = {} - with safe_open(args.model, framework="pt") as f: - for k in f.keys(): - tensors[k] = f.get_tensor(k) - for name in tensors.keys(): - data = tensors[name] - if name.endswith(".self_attention.rotary_emb.inv_freq"): - continue - old_dtype = data.dtype - # TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?) - data = data.to(torch.float32).squeeze().numpy() - new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias")) - if new_name is None: - print("Can not map tensor '" + name + "'") - sys.exit() - n_dims = len(data.shape) - print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) - - gguf_writer.add_tensor(new_name, data) - print("gguf: write header") - gguf_writer.write_header_to_file() - print("gguf: write metadata") - gguf_writer.write_kv_data_to_file() - print("gguf: write tensors") - gguf_writer.write_tensors_to_file() - - gguf_writer.close() - - print(f"gguf: model successfully exported to '{args.outfile}'") - print("") - - - -if __name__ == '__main__': - main()