From 3c7cb413fbc049484e940e04837dbf4b3e4506fd Mon Sep 17 00:00:00 2001 From: tpoisonooo Date: Fri, 17 Mar 2023 16:53:53 +0800 Subject: [PATCH] improvement(tools): optimize with argparse --- convert-pth-to-ggml.py | 241 ++++++++++++++++++++++------------------- 1 file changed, 129 insertions(+), 112 deletions(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 5c36e9c09..df630c0cd 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -22,19 +22,27 @@ import json import struct import numpy as np import torch +import argparse +import os + from sentencepiece import SentencePieceProcessor -if len(sys.argv) < 3: - print("Usage: convert-ckpt-to-ggml.py dir-model ftype\n") - print(" ftype == 0 -> float32") - print(" ftype == 1 -> float16") - sys.exit(1) -# output in the same directory as the model -dir_model = sys.argv[1] +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert ckpt models to ggml models.') + parser.add_argument('dir_model', + type=str, + help='Directory path of the checkpoint model') + parser.add_argument('ftype', + type=str, + choices=['f32', 'f16'], + help='Data type of the converted tensor, f32 or f16') + parser.add_argument('out_dir', + type=str, + help='Directory path for storing ggml model') + return parser.parse_args() -fname_hparams = sys.argv[1] + "/params.json" -fname_tokenizer = sys.argv[1] + "/../tokenizer.model" def get_n_parts(dim): if dim == 4096: @@ -49,129 +57,138 @@ def get_n_parts(dim): print("Invalid dim: " + str(dim)) sys.exit(1) -# possible data types -# ftype == 0 -> float32 -# ftype == 1 -> float16 -# -# map from ftype to string -ftype_str = ["f32", "f16"] -ftype = 1 -if len(sys.argv) > 2: - ftype = int(sys.argv[2]) - if ftype < 0 or ftype > 1: - print("Invalid ftype: " + str(ftype)) - sys.exit(1) - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" +def main(): + args = parse_args() + dir_model = args.dir_model + out_dir = args.out_dir -with open(fname_hparams, "r") as f: - hparams = json.load(f) + if not os.path.exists(out_dir): + os.mkdir(out_dir) -tokenizer = SentencePieceProcessor(fname_tokenizer) + ftype = args.ftype + ftype_int = {'f32': 0, 'f16': 1} + fname_hparams = os.path.join(dir_model, 'params.json') + fname_tokenizer = os.path.join(dir_model, '..', 'tokenizer.model') -hparams.update({"vocab_size": tokenizer.vocab_size()}) + with open(fname_hparams, "r") as f: + hparams = json.load(f) -n_parts = get_n_parts(hparams["dim"]) + tokenizer = SentencePieceProcessor(fname_tokenizer) -print(hparams) -print('n_parts = ', n_parts) + hparams.update({"vocab_size": tokenizer.vocab_size()}) -for p in range(n_parts): - print('Processing part ', p) + n_parts = get_n_parts(hparams["dim"]) - #fname_model = sys.argv[1] + "/consolidated.00.pth" - fname_model = sys.argv[1] + "/consolidated.0" + str(p) + ".pth" - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" - if (p > 0): - fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin" + "." + str(p) + print(hparams) + print('n_parts = ', n_parts) - model = torch.load(fname_model, map_location="cpu") + for p in range(n_parts): + print('Processing part ', p) - fout = open(fname_out, "wb") - - fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex - fout.write(struct.pack("i", hparams["vocab_size"])) - fout.write(struct.pack("i", hparams["dim"])) - fout.write(struct.pack("i", hparams["multiple_of"])) - fout.write(struct.pack("i", hparams["n_heads"])) - fout.write(struct.pack("i", hparams["n_layers"])) - fout.write(struct.pack("i", hparams["dim"] // hparams["n_heads"])) # rot (obsolete) - fout.write(struct.pack("i", ftype)) - - # Is this correct?? - for i in range(tokenizer.vocab_size()): - if tokenizer.is_unknown(i): - # "" token (translated as ??) - text = " \u2047 ".encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) - elif tokenizer.is_control(i): - # ""/"" tokens - fout.write(struct.pack("i", 0)) - elif tokenizer.is_byte(i): - # "" tokens (which may be invalid UTF-8) - piece = tokenizer.id_to_piece(i) - if len(piece) != 6: - print("Invalid token: " + piece) - sys.exit(1) - byte_value = int(piece[3:-1], 16) - fout.write(struct.pack("i", 1)) - fout.write(struct.pack("B", byte_value)) + #fname_model = sys.argv[1] + "/consolidated.00.pth" + fname_model = os.path.join(dir_model, "consolidated.0{}.pth".format(p)) + if p > 0: + fname_out = os.path.join(out_dir, + "ggml-model-{}.bin.{}".format(ftype, p)) else: - # normal token. Uses U+2581 (LOWER ONE EIGHTH BLOCK) to represent spaces. - text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") - fout.write(struct.pack("i", len(text))) - fout.write(text) + fname_out = os.path.join(out_dir, + "ggml-model-{}.bin".format(ftype)) - for k, v in model.items(): - name = k - shape = v.shape + model = torch.load(fname_model, map_location="cpu") - # skip layers.X.attention.inner_attention.rope.freqs - if name[-5:] == "freqs": - continue + fout = open(fname_out, "wb") - print("Processing variable: " + name + " with shape: ", shape, " and type: ", v.dtype) + fout.write(struct.pack("i", 0x67676d6c)) # magic: ggml in hex + fout.write(struct.pack("i", hparams["vocab_size"])) + fout.write(struct.pack("i", hparams["dim"])) + fout.write(struct.pack("i", hparams["multiple_of"])) + fout.write(struct.pack("i", hparams["n_heads"])) + fout.write(struct.pack("i", hparams["n_layers"])) + fout.write(struct.pack("i", hparams["dim"] // + hparams["n_heads"])) # rot (obsolete) + fout.write(struct.pack("i", ftype_int[ftype])) - #data = tf.train.load_variable(dir_model, name).squeeze() - data = v.numpy().squeeze() - n_dims = len(data.shape); + # Is this correct?? + for i in range(tokenizer.vocab_size()): + if tokenizer.is_unknown(i): + # "" token (translated as ??) + text = " \u2047 ".encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) + elif tokenizer.is_control(i): + # ""/"" tokens + fout.write(struct.pack("i", 0)) + elif tokenizer.is_byte(i): + # "" tokens (which may be invalid UTF-8) + piece = tokenizer.id_to_piece(i) + if len(piece) != 6: + print("Invalid token: " + piece) + sys.exit(1) + byte_value = int(piece[3:-1], 16) + fout.write(struct.pack("i", 1)) + fout.write(struct.pack("B", byte_value)) + else: + # normal token. Uses U+2581 (LOWER ONE EIGHTH BLOCK) to represent spaces. + text = tokenizer.id_to_piece(i).replace("\u2581", + " ").encode("utf-8") + fout.write(struct.pack("i", len(text))) + fout.write(text) - # for efficiency - transpose some matrices - # "model/h.*/attn/c_attn/w" - # "model/h.*/attn/c_proj/w" - # "model/h.*/mlp/c_fc/w" - # "model/h.*/mlp/c_proj/w" - #if name[-14:] == "/attn/c_attn/w" or \ - # name[-14:] == "/attn/c_proj/w" or \ - # name[-11:] == "/mlp/c_fc/w" or \ - # name[-13:] == "/mlp/c_proj/w": - # print(" Transposing") - # data = data.transpose() + for k, v in model.items(): + name = k + shape = v.shape - dshape = data.shape + # skip layers.X.attention.inner_attention.rope.freqs + if name[-5:] == "freqs": + continue - # default type is fp16 - ftype_cur = 1 - if ftype == 0 or n_dims == 1: - print(" Converting to float32") - data = data.astype(np.float32) - ftype_cur = 0 + print("Processing variable: " + name + " with shape: ", shape, + " and type: ", v.dtype) - # header - sname = name.encode('utf-8') - fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur)) - for i in range(n_dims): - fout.write(struct.pack("i", dshape[n_dims - 1 - i])) - fout.write(sname); + #data = tf.train.load_variable(dir_model, name).squeeze() + data = v.numpy().squeeze() + n_dims = len(data.shape) - # data - data.tofile(fout) + # for efficiency - transpose some matrices + # "model/h.*/attn/c_attn/w" + # "model/h.*/attn/c_proj/w" + # "model/h.*/mlp/c_fc/w" + # "model/h.*/mlp/c_proj/w" + #if name[-14:] == "/attn/c_attn/w" or \ + # name[-14:] == "/attn/c_proj/w" or \ + # name[-11:] == "/mlp/c_fc/w" or \ + # name[-13:] == "/mlp/c_proj/w": + # print(" Transposing") + # data = data.transpose() - # I hope this deallocates the memory .. - model = None + dshape = data.shape - fout.close() + # default type is fp16 + ftype_cur = 1 + if ftype == 'f32' or n_dims == 1: + print(" Converting to float32") + data = data.astype(np.float32) + ftype_cur = 0 - print("Done. Output file: " + fname_out + ", (part ", p, ")") - print("") + # header + sname = name.encode('utf-8') + fout.write(struct.pack("iii", n_dims, len(sname), ftype_cur)) + for i in range(n_dims): + fout.write(struct.pack("i", dshape[n_dims - 1 - i])) + fout.write(sname) + + # data + data.tofile(fout) + + # I hope this deallocates the memory .. + model = None + + fout.close() + + print("Done. Output file: " + fname_out + ", (part ", p, ")") + print("") + + +if __name__ == '__main__': + main()