From 3db04db2b839cd16b79ca76e213e4e9da22a25bb Mon Sep 17 00:00:00 2001 From: Phillip Kravtsov Date: Fri, 29 Sep 2023 14:59:51 -0700 Subject: [PATCH] update conversion script to directly take adept artifacts rather than .saftensors file --- ...to-gguf.py => convert-persimmon-to-gguf.py | 78 +++++++++---------- 1 file changed, 38 insertions(+), 40 deletions(-) rename convert-persimmon-st-to-gguf.py => convert-persimmon-to-gguf.py (65%) diff --git a/convert-persimmon-st-to-gguf.py b/convert-persimmon-to-gguf.py similarity index 65% rename from convert-persimmon-st-to-gguf.py rename to convert-persimmon-to-gguf.py index f8fcbb4bd..25c8a5963 100644 --- a/convert-persimmon-st-to-gguf.py +++ b/convert-persimmon-to-gguf.py @@ -1,28 +1,31 @@ -from convert import lazy_load_safetensors_file -import sys import torch -from safetensors import safe_open -from pathlib import Path +import os from pprint import pprint -from sentencepiece import SentencePieceProcessor +import sys import argparse +from pathlib import Path +from sentencepiece import SentencePieceProcessor +if 'NO_LOCAL_GGUF' not in os.environ: + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) import gguf -import json -import struct -def file_is_safetensors(path: Path) -> bool: - fp = open(path, 'rb') - first8 = fp.read(8) - fp.seek(0) - if first8[:2] == b'PK': - # A zip file, i.e. PyTorch format - return False - return struct.unpack(' None: - args = get_args() - assert file_is_safetensors(args.model), 'Error: model file is not a SafeTensors file' - dir_model = args.model.parent - with open(dir_model / 'config.json', 'r') as f: - hparams = json.load(f) arch = gguf.MODEL_ARCH.PERSIMMON gguf_writer = gguf.GGUFWriter(args.outfile, gguf.MODEL_ARCH_NAMES[arch]) - block_count = hparams['num_layers'] - head_count = hparams['num_attention_heads'] + block_count = hparams.num_layers + head_count = hparams.num_attention_heads head_count_kv = head_count - ctx_length = hparams['seq_length'] - hidden_size = hparams['hidden_size'] + ctx_length = hparams.seq_length + hidden_size = hparams.hidden_size gguf_writer.add_name('persimmon-8b-chat') gguf_writer.add_context_length(ctx_length) gguf_writer.add_embedding_length(hidden_size) gguf_writer.add_block_count(block_count) - gguf_writer.add_feed_forward_length(hparams['ffn_hidden_size']) + gguf_writer.add_feed_forward_length(hparams.ffn_hidden_size) gguf_writer.add_rope_dimension_count(hidden_size // head_count) gguf_writer.add_head_count(head_count) gguf_writer.add_head_count_kv(head_count_kv) - gguf_writer.add_rope_freq_base(hparams['rotary_emb_base']) - gguf_writer.add_layer_norm_eps(hparams['layernorm_epsilon']) - tokens, scores, toktypes = get_tokenizer_info(dir_model) + gguf_writer.add_rope_freq_base(hparams.rotary_emb_base) + gguf_writer.add_layer_norm_eps(hparams.layernorm_epsilon) + tokens, scores, toktypes = get_tokenizer_info(args.model_dir) gguf_writer.add_tokenizer_model('llama') gguf_writer.add_token_list(tokens) gguf_writer.add_token_scores(scores) @@ -98,10 +100,6 @@ def main() -> None: tensor_map = gguf.get_tensor_name_map(arch, block_count) print(tensor_map) - tensors = {} - with safe_open(args.model, framework="pt") as f: - for k in f.keys(): - tensors[k] = f.get_tensor(k) for name in tensors.keys(): data = tensors[name] if name.endswith(".self_attention.rotary_emb.inv_freq"): @@ -132,4 +130,4 @@ def main() -> None: if __name__ == '__main__': - main() + main() \ No newline at end of file