convert-new.py : output gguf (WIP)
This commit is contained in:
parent
5ec18934ad
commit
250cf83847
1 changed files with 36 additions and 20 deletions
|
@ -1,5 +1,6 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import gguf
|
||||||
import argparse
|
import argparse
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import copy
|
import copy
|
||||||
|
@ -118,6 +119,7 @@ class Params:
|
||||||
n_mult: int
|
n_mult: int
|
||||||
n_head: int
|
n_head: int
|
||||||
n_layer: int
|
n_layer: int
|
||||||
|
n_ctx: int
|
||||||
n_kv_head: Optional[int] # This parameter is only used for Llama 2
|
n_kv_head: Optional[int] # This parameter is only used for Llama 2
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -145,6 +147,7 @@ class Params:
|
||||||
n_mult = 256,
|
n_mult = 256,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_ctx = -1,
|
||||||
n_kv_head = None,
|
n_kv_head = None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -161,12 +164,21 @@ class Params:
|
||||||
|
|
||||||
n_mult = find_n_mult(n_ff, n_embd);
|
n_mult = find_n_mult(n_ff, n_embd);
|
||||||
|
|
||||||
|
if "max_sequence_length" in hparams:
|
||||||
|
n_ctx = hparams["max_sequence_length"]
|
||||||
|
elif "max_position_embeddings" in hparams:
|
||||||
|
n_ctx = hparams["max_position_embeddings"]
|
||||||
|
else:
|
||||||
|
raise Exception("failed to guess 'n_ctx'. This model is unknown or unsupported.\n"
|
||||||
|
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = n_vocab,
|
n_vocab = n_vocab,
|
||||||
n_embd = n_embd,
|
n_embd = n_embd,
|
||||||
n_mult = n_mult,
|
n_mult = n_mult,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_ctx = n_ctx,
|
||||||
n_kv_head = n_kv_head,
|
n_kv_head = n_kv_head,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -191,6 +203,7 @@ class Params:
|
||||||
n_mult = n_mult,
|
n_mult = n_mult,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_ctx = -1,
|
||||||
n_kv_head = None,
|
n_kv_head = None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -206,7 +219,6 @@ class Params:
|
||||||
else:
|
else:
|
||||||
params = Params.guessed(model_plus.model)
|
params = Params.guessed(model_plus.model)
|
||||||
|
|
||||||
print(f'params: n_vocab:{params.n_vocab} n_embd:{params.n_embd} n_mult:{params.n_mult} n_head:{params.n_head} n_layer:{params.n_layer}')
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@ -715,21 +727,14 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
|
||||||
|
|
||||||
class OutputFile:
|
class OutputFile:
|
||||||
def __init__(self, fname_out: Path) -> None:
|
def __init__(self, fname_out: Path) -> None:
|
||||||
self.fout = open(fname_out, "wb")
|
self.gguf = gguf.GGUFWriter.open(fname_out)
|
||||||
|
|
||||||
def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
|
def write_file_header(self, params: Params, file_type: GGMLFileType) -> None:
|
||||||
self.fout.write(b"ggjt"[::-1]) # magic
|
llm_arch = "llama"
|
||||||
values = [
|
|
||||||
1, # file version
|
self.gguf.add_architecture(llm_arch)
|
||||||
params.n_vocab,
|
self.gguf.add_context_length(llm_arch, params.n_ctx)
|
||||||
params.n_embd,
|
self.gguf.add_embedding_length(llm_arch, params.n_embd)
|
||||||
params.n_mult,
|
|
||||||
params.n_head,
|
|
||||||
params.n_layer,
|
|
||||||
params.n_embd // params.n_head, # rot (obsolete)
|
|
||||||
file_type.value,
|
|
||||||
]
|
|
||||||
self.fout.write(struct.pack("i" * len(values), *values))
|
|
||||||
|
|
||||||
def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
|
def write_tensor_header(self, name: str, shape: Sequence[int], data_type: DataType) -> None:
|
||||||
sname = name.encode('utf-8')
|
sname = name.encode('utf-8')
|
||||||
|
@ -873,7 +878,6 @@ def filter_and_sort_tensors(model: LazyModel) -> LazyModel:
|
||||||
|
|
||||||
|
|
||||||
def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, SentencePieceVocab]:
|
def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, SentencePieceVocab]:
|
||||||
print(f"vocabtype: {vocabtype}")
|
|
||||||
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
# Be extra-friendly and accept either a file or a directory. Also, if it's
|
||||||
# a directory, it might be the model directory, and tokenizer.model might
|
# a directory, it might be the model directory, and tokenizer.model might
|
||||||
# be in the parent of that.
|
# be in the parent of that.
|
||||||
|
@ -893,7 +897,7 @@ def load_vocab(path: Path, vocabtype: Optional[str]) -> Union[BpeVocab, Sentence
|
||||||
f"Could not find tokenizer.model in {path} or its parent; "
|
f"Could not find tokenizer.model in {path} or its parent; "
|
||||||
"if it's in another directory, pass the directory as --vocab-dir")
|
"if it's in another directory, pass the directory as --vocab-dir")
|
||||||
added_tokens_path = path.parent / "added_tokens.json"
|
added_tokens_path = path.parent / "added_tokens.json"
|
||||||
print(f"Loading vocab file {path}")
|
print(f"Loading vocab file '{path}', type '{vocabtype}'")
|
||||||
if vocabtype == "bpe":
|
if vocabtype == "bpe":
|
||||||
return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
|
return BpeVocab(path, added_tokens_path if added_tokens_path.exists() else None)
|
||||||
elif vocabtype == "spm":
|
elif vocabtype == "spm":
|
||||||
|
@ -933,21 +937,34 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
||||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
||||||
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
||||||
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
|
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
|
||||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)")
|
parser.add_argument("--vocabtype", choices=["spm", "bpe"], help="vocab format (default: spm)", default="spm")
|
||||||
|
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
vocab: Vocab
|
vocab: Vocab
|
||||||
if args.dump_single:
|
if args.dump_single:
|
||||||
model_plus = lazy_load_file(args.model)
|
model_plus = lazy_load_file(args.model)
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
elif args.vocab_only:
|
|
||||||
|
model_plus = load_some_model(args.model)
|
||||||
|
params = Params.load(model_plus)
|
||||||
|
if params.n_ctx == -1:
|
||||||
|
if args.ctx is None:
|
||||||
|
raise Exception("The model doesn't have a context size, and you didn't specify one with --ctx\n"
|
||||||
|
"Please specify one with --ctx:\n"
|
||||||
|
" - LLaMA v1: --ctx 2048\n"
|
||||||
|
" - LLaMA v2: --ctx 4096\n")
|
||||||
|
params.n_ctx = args.ctx
|
||||||
|
|
||||||
|
print(f"params = {params}")
|
||||||
|
|
||||||
|
if args.vocab_only:
|
||||||
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
||||||
assert args.outfile, "need --outfile if using --vocab-only"
|
assert args.outfile, "need --outfile if using --vocab-only"
|
||||||
outfile = args.outfile
|
outfile = args.outfile
|
||||||
OutputFile.write_vocab_only(outfile, vocab)
|
OutputFile.write_vocab_only(outfile, vocab)
|
||||||
print(f"Wrote {outfile}")
|
print(f"Wrote {outfile}")
|
||||||
else:
|
else:
|
||||||
model_plus = load_some_model(args.model)
|
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
|
@ -957,7 +974,6 @@ def main(args_in: Optional[List[str]] = None) -> None:
|
||||||
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
||||||
vocab = load_vocab(vocab_dir, args.vocabtype)
|
vocab = load_vocab(vocab_dir, args.vocabtype)
|
||||||
|
|
||||||
params = Params.load(model_plus)
|
|
||||||
model = model_plus.model
|
model = model_plus.model
|
||||||
model = do_necessary_conversions(model, params)
|
model = do_necessary_conversions(model, params)
|
||||||
output_type = pick_output_type(model, args.outtype)
|
output_type = pick_output_type(model, args.outtype)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue