convert.py: add python logging instead of print()
This commit is contained in:
parent
952d03dbea
commit
573dcecda1
1 changed files with 29 additions and 18 deletions
47
convert.py
47
convert.py
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import enum
|
import enum
|
||||||
|
@ -643,7 +644,7 @@ class LlamaHfVocab(Vocab):
|
||||||
|
|
||||||
|
|
||||||
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
||||||
# print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
|
# logging.info( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
|
||||||
if n_head_kv is not None and n_head != n_head_kv:
|
if n_head_kv is not None and n_head != n_head_kv:
|
||||||
n_head = n_head_kv
|
n_head = n_head_kv
|
||||||
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
||||||
|
@ -1033,12 +1034,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)
|
||||||
|
|
||||||
# Check for a vocab size mismatch
|
# Check for a vocab size mismatch
|
||||||
if params.n_vocab == vocab.vocab_size:
|
if params.n_vocab == vocab.vocab_size:
|
||||||
print("Ignoring added_tokens.json since model matches vocab size without it.")
|
logging.warning("Ignoring added_tokens.json since model matches vocab size without it.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if pad_vocab and params.n_vocab > vocab.vocab_size:
|
if pad_vocab and params.n_vocab > vocab.vocab_size:
|
||||||
pad_count = params.n_vocab - vocab.vocab_size
|
pad_count = params.n_vocab - vocab.vocab_size
|
||||||
print(
|
logging.debug(
|
||||||
f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
|
f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
|
||||||
)
|
)
|
||||||
for i in range(1, pad_count + 1):
|
for i in range(1, pad_count + 1):
|
||||||
|
@ -1166,7 +1167,7 @@ class OutputFile:
|
||||||
elapsed = time.time() - start
|
elapsed = time.time() - start
|
||||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||||
padi = len(str(len(model)))
|
padi = len(str(len(model)))
|
||||||
print(
|
logging.info(
|
||||||
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
|
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
|
||||||
)
|
)
|
||||||
self.gguf.write_tensor_data(ndarray)
|
self.gguf.write_tensor_data(ndarray)
|
||||||
|
@ -1281,12 +1282,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
|
||||||
# HF models permut or pack some of the tensors, so we need to undo that
|
# HF models permut or pack some of the tensors, so we need to undo that
|
||||||
for i in itertools.count():
|
for i in itertools.count():
|
||||||
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
||||||
print(f"Permuting layer {i}")
|
logging.debug(f"Permuting layer {i}")
|
||||||
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
|
||||||
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
|
||||||
# tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
# tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
||||||
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
||||||
print(f"Unpacking and permuting layer {i}")
|
logging.debug(f"Unpacking and permuting layer {i}")
|
||||||
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
|
||||||
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
|
||||||
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
||||||
|
@ -1299,15 +1300,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
|
||||||
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
|
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
|
||||||
if name_new is None:
|
if name_new is None:
|
||||||
if skip_unknown:
|
if skip_unknown:
|
||||||
print(f"Unexpected tensor name: {name} - skipping")
|
logging.warning(f"Unexpected tensor name: {name} - skipping")
|
||||||
continue
|
continue
|
||||||
raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
|
raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
|
||||||
|
|
||||||
if tensor_type in should_skip:
|
if tensor_type in should_skip:
|
||||||
print(f"skipping tensor {name_new}")
|
logging.debug(f"skipping tensor {name_new}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
|
logging.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
|
||||||
out[name_new] = lazy_tensor
|
out[name_new] = lazy_tensor
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
@ -1372,7 +1373,7 @@ def load_some_model(path: Path) -> ModelPlus:
|
||||||
paths = find_multifile_paths(path)
|
paths = find_multifile_paths(path)
|
||||||
models_plus: list[ModelPlus] = []
|
models_plus: list[ModelPlus] = []
|
||||||
for path in paths:
|
for path in paths:
|
||||||
print(f"Loading model file {path}")
|
logging.info(f"Loading model file {path}")
|
||||||
models_plus.append(lazy_load_file(path))
|
models_plus.append(lazy_load_file(path))
|
||||||
|
|
||||||
model_plus = merge_multifile_models(models_plus)
|
model_plus = merge_multifile_models(models_plus)
|
||||||
|
@ -1413,7 +1414,7 @@ class VocabFactory:
|
||||||
else:
|
else:
|
||||||
raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
|
raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
|
||||||
|
|
||||||
print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
|
logging.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
|
||||||
return vocab
|
return vocab
|
||||||
|
|
||||||
def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
|
def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
|
||||||
|
@ -1473,8 +1474,18 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
|
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
|
||||||
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
||||||
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
||||||
|
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
||||||
|
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
|
if args.dump_single or args.dump:
|
||||||
|
# Avoid printing anything besides the dump output
|
||||||
|
logging.basicConfig(level=logging.CRITICAL)
|
||||||
|
elif args.verbose:
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
else:
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
if args.no_vocab and args.vocab_only:
|
if args.no_vocab and args.vocab_only:
|
||||||
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
||||||
|
|
||||||
|
@ -1491,6 +1502,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
|
|
||||||
endianess = gguf.GGUFEndian.LITTLE
|
endianess = gguf.GGUFEndian.LITTLE
|
||||||
if args.big_endian:
|
if args.big_endian:
|
||||||
endianess = gguf.GGUFEndian.BIG
|
endianess = gguf.GGUFEndian.BIG
|
||||||
|
@ -1513,7 +1525,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
"q8_0": GGMLFileType.MostlyQ8_0,
|
"q8_0": GGMLFileType.MostlyQ8_0,
|
||||||
}[args.outtype]
|
}[args.outtype]
|
||||||
|
|
||||||
print(f"params = {params}")
|
logging.info(f"params = {params}")
|
||||||
|
|
||||||
model_parent_path = model_plus.paths[0].parent
|
model_parent_path = model_plus.paths[0].parent
|
||||||
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
||||||
|
@ -1528,15 +1540,14 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
outfile = args.outfile
|
outfile = args.outfile
|
||||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
||||||
endianess=endianess, pad_vocab=args.pad_vocab)
|
endianess=endianess, pad_vocab=args.pad_vocab)
|
||||||
print(f"Wrote {outfile}")
|
logging.info(f"Wrote {outfile}")
|
||||||
return
|
return
|
||||||
|
|
||||||
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
|
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
|
||||||
vocab = model_plus.vocab
|
vocab = model_plus.vocab
|
||||||
|
|
||||||
print(f"Vocab info: {vocab}")
|
logging.info(f"Vocab info: {vocab}")
|
||||||
print(f"Special vocab info: {special_vocab}")
|
logging.info(f"Special vocab info: {special_vocab}")
|
||||||
|
|
||||||
model = model_plus.model
|
model = model_plus.model
|
||||||
model = convert_model_names(model, params, args.skip_unknown)
|
model = convert_model_names(model, params, args.skip_unknown)
|
||||||
ftype = pick_output_type(model, args.outtype)
|
ftype = pick_output_type(model, args.outtype)
|
||||||
|
@ -1544,11 +1555,11 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
|
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
|
||||||
|
|
||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
print(f"Writing {outfile}, format {ftype}")
|
logging.info(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
||||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
||||||
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
|
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
|
||||||
print(f"Wrote {outfile}")
|
logging.info(f"Wrote {outfile}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue